From aa53fe4375255986e021905a15bd182e0b1f9657 Mon Sep 17 00:00:00 2001 From: dramanica Date: Tue, 15 Oct 2024 20:47:37 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20EvolEcol?= =?UTF-8?q?Group/tidysdm@7c84d4337b8a1f191c214f1bd377ff296d6f8b38=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dev/articles/a0_tidysdm_overview.html | 20 ++++++++++---------- dev/articles/a1_palaeodata_application.html | 10 +++++----- dev/articles/a2_tidymodels_additions.html | 14 +++++++------- dev/articles/a3_troubleshooting.html | 12 ++++++------ dev/pkgdown.yml | 2 +- dev/search.json | 2 +- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/dev/articles/a0_tidysdm_overview.html b/dev/articles/a0_tidysdm_overview.html index c1b4987..cd352e6 100644 --- a/dev/articles/a0_tidysdm_overview.html +++ b/dev/articles/a0_tidysdm_overview.html @@ -109,7 +109,7 @@

SDMs with tidymodels#> dplyr::filter() masks stats::filter() #> dplyr::lag() masks stats::lag() #> recipes::step() masks stats::step() -#> Dig deeper into tidy modeling with R at https://www.tmwr.org +#> Use suppressPackageStartupMessages() to eliminate package startup messages #> Loading required package: spatialsample
#> Loading required package: terra
-#> terra 1.7.78
+#> terra 1.7.83
 #> 
 #> Attaching package: 'terra'
 #> The following object is masked from 'package:tidyr':
@@ -487,13 +487,13 @@ 

Fit the model by cross-validation ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 4 resampling: default_glm -#> 1 of 4 resampling: default_glm (193ms) +#> 1 of 4 resampling: default_glm (196ms) #> i 2 of 4 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry -#> 2 of 4 tuning: default_rf (828ms) +#> 2 of 4 tuning: default_rf (826ms) #> i 3 of 4 tuning: default_gbm #> i Creating pre-processing data to finalize unknown parameter: mtry -#> 3 of 4 tuning: default_gbm (3.9s) +#> 3 of 4 tuning: default_gbm (4s) #> i 4 of 4 tuning: default_maxent #> 4 of 4 tuning: default_maxent (1.2s)

Note that workflow_set correctly detects that we have no @@ -811,17 +811,17 @@

Repeated ensembles#> i 1 of 2 resampling: default_glm #> 1 of 2 resampling: default_glm (228ms) #> i 2 of 2 tuning: default_maxent -#> 2 of 2 tuning: default_maxent (6.8s) +#> 2 of 2 tuning: default_maxent (7.2s) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm -#> 1 of 2 resampling: default_glm (228ms) +#> 1 of 2 resampling: default_glm (225ms) #> i 2 of 2 tuning: default_maxent -#> 2 of 2 tuning: default_maxent (7.1s) +#> 2 of 2 tuning: default_maxent (6.9s) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm -#> 1 of 2 resampling: default_glm (235ms) +#> 1 of 2 resampling: default_glm (229ms) #> i 2 of 2 tuning: default_maxent -#> 2 of 2 tuning: default_maxent (7.2s) +#> 2 of 2 tuning: default_maxent (7.3s)

Now we can create a repeat_ensemble from the list:

 lacerta_rep_ens <- repeat_ensemble() %>% add_repeat(ensemble_list)
diff --git a/dev/articles/a1_palaeodata_application.html b/dev/articles/a1_palaeodata_application.html
index a5026bc..9527744 100644
--- a/dev/articles/a1_palaeodata_application.html
+++ b/dev/articles/a1_palaeodata_application.html
@@ -95,7 +95,7 @@ 

SDMs with tidymodels for palaeo da #> dplyr::filter() masks stats::filter() #> dplyr::lag() masks stats::lag() #> recipes::step() masks stats::step() -#> Learn how to get started at https://www.tidymodels.org/start/ +#> Use tidymodels_prefer() to resolve common conflicts. #> Loading required package: spatialsample

+#> 4 of 4 tuning: default_gbm (15.2s)

Note that workflow_set correctly detects that we have no tuning parameters for glm and gam. We can have a look at the performance of our models with:

diff --git a/dev/articles/a2_tidymodels_additions.html b/dev/articles/a2_tidymodels_additions.html index 26e03b6..d152431 100644 --- a/dev/articles/a2_tidymodels_additions.html +++ b/dev/articles/a2_tidymodels_additions.html @@ -108,7 +108,7 @@

Exploring models with DALEX#> dplyr::filter() masks stats::filter() #> dplyr::lag() masks stats::lag() #> recipes::step() masks stats::step() -#> Learn how to get started at https://www.tidymodels.org/start/ +#> Use tidymodels_prefer() to resolve common conflicts. #> Loading required package: spatialsample lacerta_ensemble #> A simple_ensemble of models @@ -395,12 +395,12 @@

Different recipes for certain mode ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 3 resampling: uncor_glm -#> 1 of 3 resampling: uncor_glm (316ms) +#> 1 of 3 resampling: uncor_glm (317ms) #> i 2 of 3 tuning: all_rf #> ! No improvement for 10 iterations; returning current results. -#> 2 of 3 tuning: all_rf (16s) +#> 2 of 3 tuning: all_rf (16.1s) #> i 3 of 3 tuning: all_svm -#> 3 of 3 tuning: all_svm (21.2s) +#> 3 of 3 tuning: all_svm (20.7s)

We can have a look at the performance of our models with:

 autoplot(lacerta_models)
@@ -589,10 +589,10 @@

Using multi-level factors as pr ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm -#> 1 of 2 resampling: default_glm (192ms) +#> 1 of 2 resampling: default_glm (196ms) #> i 2 of 2 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry -#> 2 of 2 tuning: default_rf (967ms) +#> 2 of 2 tuning: default_rf (963ms) # fit the ensemble lacerta_ensemble <- simple_ensemble() %>% add_member(lacerta_models, metric = "boyce_cont") @@ -630,7 +630,7 @@

Using multi-level factors as pr climate_present$topography <- climate_present$altitude climate_present$topography <- terra::classify(climate_present$topography, rcl = c(-Inf, 200, 800, Inf), include.lowest=TRUE, brackets=TRUE) library(terra) -#> terra 1.7.78 +#> terra 1.7.83 #> #> Attaching package: 'terra' #> The following objects are masked from 'package:kernlab': diff --git a/dev/articles/a3_troubleshooting.html b/dev/articles/a3_troubleshooting.html index 8873a04..c5c49cf 100644 --- a/dev/articles/a3_troubleshooting.html +++ b/dev/articles/a3_troubleshooting.html @@ -94,7 +94,7 @@

NAs in the data#> dplyr::filter() masks stats::filter() #> dplyr::lag() masks stats::lag() #> recipes::step() masks stats::step() -#> Use suppressPackageStartupMessages() to eliminate package startup messages +#> Use tidymodels_prefer() to resolve common conflicts. #> Loading required package: spatialsample lacerta_thin <- readRDS(system.file("extdata/lacerta_climate_sf.RDS", package = "tidysdm" @@ -135,7 +135,7 @@

NAs in the data ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm -#> 1 of 2 resampling: default_glm (320ms) +#> 1 of 2 resampling: default_glm (329ms) #> i 2 of 2 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> → A | error: Missing data in columns: bio05. @@ -238,7 +238,7 @@

Recipes and the response variable#> → A | error: ! `rand_forest()` was unable to find an outcome. #> Ensure that you have specified an outcome column and that it hasn't been #> removed in pre-processing. -#> There were issues with some computations A: x2 +#> There were issues with some computations A: x1 #> There were issues with some computations A: x15 #> #> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more information. @@ -327,7 +327,7 @@

Using the desired formula with GAM ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm -#> 1 of 2 resampling: default_glm (271ms) +#> 1 of 2 resampling: default_glm (270ms) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 2 of 2 resampling: default_gam #> 2 of 2 resampling: default_gam (1.4s) @@ -405,7 +405,7 @@

When only some splits fail ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 3 resampling: default_glm -#> 1 of 3 resampling: default_glm (188ms) +#> 1 of 3 resampling: default_glm (206ms) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 2 of 3 resampling: default_gam #> → A | warning: Fitting terminated with step failure - check results carefully @@ -415,7 +415,7 @@

When only some splits fail#> 2 of 3 resampling: default_gam (1.6s) #> i 3 of 3 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry -#> 3 of 3 tuning: default_rf (473ms) +#> 3 of 3 tuning: default_rf (475ms)

We see that one of the folds gives us an error when using GAMs. The error (“Fitting terminated with step failure - check results carefully”) comes from the gam function in the package mgcv. A quick diff --git a/dev/pkgdown.yml b/dev/pkgdown.yml index f089f2d..7667cf4 100644 --- a/dev/pkgdown.yml +++ b/dev/pkgdown.yml @@ -6,7 +6,7 @@ articles: a1_palaeodata_application: a1_palaeodata_application.html a2_tidymodels_additions: a2_tidymodels_additions.html a3_troubleshooting: a3_troubleshooting.html -last_built: 2024-10-13T13:58Z +last_built: 2024-10-15T20:44Z urls: reference: https://evolecolgroup.github.io/tidysdm/reference article: https://evolecolgroup.github.io/tidysdm/articles diff --git a/dev/search.json b/dev/search.json index 2667454..bb50a64 100644 --- a/dev/search.json +++ b/dev/search.json @@ -1 +1 @@ -[{"path":"https://evolecolgroup.github.io/tidysdm/dev/CODE_OF_CONDUCT.html","id":null,"dir":"","previous_headings":"","what":"Contributor Code of Conduct","title":"Contributor Code of Conduct","text":"contributors maintainers project, pledge respect people contribute reporting issues, posting feature requests, updating documentation, submitting pull requests patches, activities. committed making participation project harassment-free experience everyone, regardless level experience, gender, gender identity expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion. Examples unacceptable behavior participants include use sexual language imagery, derogatory comments personal attacks, trolling, public private harassment, insults, unprofessional conduct. Project maintainers right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct. Project maintainers follow Code Conduct may removed project team. Instances abusive, harassing, otherwise unacceptable behavior may reported opening issue contacting one project maintainers. Code Conduct adapted Contributor Covenant (https://www.contributor-covenant.org), version 1.0.0, available https://contributor-covenant.org/version/1/0/0/.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to tidysdm","title":"Contributing to tidysdm","text":"document outlines contribute development tidysdm. package maintained voluntary basis, help always appreciated.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"the-basic-process-of-contributing","dir":"","previous_headings":"","what":"The basic process of contributing","title":"Contributing to tidysdm","text":"Development work tidysdm occurs dev branch. , want propose changes, work dev. Start forking project onto github repository, make changes directly fork (either dev branch, make custom branch). updating documentation checking tests pass (see ), start Pull Request. proposed changes reviewed, might asked fix/improve code. can iterative process, requiring rounds revision depending complexity code. Functions documented using roxygen. changes affects documentation , rebuild . root directory package, simply run: implemented new functionality, patched bug, consider whether add appropriate unit test. tidysdm uses testthat framework unit tests. make sure tests work : Finally, submit push request, check changes don’t break build. can check, also builds vignette runs tests.: Make sure resolved warnings notes raised devtools::check()! followed 3 steps, ready make Pull Request. changes go automatic continuous integration, check impact changes multiple platforms. everything goes well, see green tick submission.","code":"devtools::document() devtools::test() devtools::check()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to tidysdm","text":"spot typos, spelling mistakes, grammatical errors documentation, fix directly file describes function. .R file R directory, .Rd file man directory. .Rd files automatically generated roxygen2 edited hand. recommend study first roxygen2 comments work.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"functional-changes","dir":"","previous_headings":"","what":"Functional changes","title":"Contributing to tidysdm","text":"want make change impacts functioning tidysdm, ’s good idea first file issue explaining mind. change meant fix bug, add minimal reprex. good reprex also perfect starting point writing unit test, accompany functional change code. Unit tests also essential fixing bugs, can demonstrate fix work, prevent future changes undoing work. unit testing, use testthat; find tests tests, file dedicated function, following convention test_my_function.R naming files. creating tests, try make use built-datasets, rather adding data files package. Ideally, body Pull Request include phrase Fixes #issue-number, issue_number number Github. way, Pull Request automatically linked issue, issue closed Pull Request merged . user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html. continuous integration checks Pull Request reduce test coverage.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"code-style","dir":"","previous_headings":"Functional changes","what":"Code style","title":"Contributing to tidysdm","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. Lots commenting code helps mantainability; , doubt, always add explanation new code.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to tidysdm","text":"Please note tidyverse project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"GNU Affero General Public License","title":"GNU Affero General Public License","text":"Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc.  Everyone permitted copy distribute verbatim copies license document, changing allowed.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"preamble","dir":"","previous_headings":"","what":"Preamble","title":"GNU Affero General Public License","text":"GNU Affero General Public License free, copyleft license software kinds works, specifically designed ensure cooperation community case network server software. licenses software practical works designed take away freedom share change works. contrast, General Public Licenses intended guarantee freedom share change versions program–make sure remains free software users. speak free software, referring freedom, price. General Public Licenses designed make sure freedom distribute copies free software (charge wish), receive source code can get want , can change software use pieces new free programs, know can things. Developers use General Public Licenses protect rights two steps: (1) assert copyright software, (2) offer License gives legal permission copy, distribute /modify software. secondary benefit defending users’ freedom improvements made alternate versions program, receive widespread use, become available developers incorporate. Many developers free software heartened encouraged resulting cooperation. However, case software used network servers, result may fail come . GNU General Public License permits making modified version letting public access server without ever releasing source code public. GNU Affero General Public License designed specifically ensure , cases, modified source code becomes available community. requires operator network server provide source code modified version running users server. Therefore, public use modified version, publicly accessible server, gives public access source code modified version. older license, called Affero General Public License published Affero, designed accomplish similar goals. different license, version Affero GPL, Affero released new version Affero GPL permits relicensing license. precise terms conditions copying, distribution modification follow.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_0-definitions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"0. Definitions.","title":"GNU Affero General Public License","text":"“License” refers version 3 GNU Affero General Public License. “Copyright” also means copyright-like laws apply kinds works, semiconductor masks. “Program” refers copyrightable work licensed License. licensee addressed “”. “Licensees” “recipients” may individuals organizations. “modify” work means copy adapt part work fashion requiring copyright permission, making exact copy. resulting work called “modified version” earlier work work “based ” earlier work. “covered work” means either unmodified Program work based Program. “propagate” work means anything , without permission, make directly secondarily liable infringement applicable copyright law, except executing computer modifying private copy. Propagation includes copying, distribution (without modification), making available public, countries activities well. “convey” work means kind propagation enables parties make receive copies. Mere interaction user computer network, transfer copy, conveying. interactive user interface displays “Appropriate Legal Notices” extent includes convenient prominently visible feature (1) displays appropriate copyright notice, (2) tells user warranty work (except extent warranties provided), licensees may convey work License, view copy License. interface presents list user commands options, menu, prominent item list meets criterion.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_1-source-code","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"1. Source Code.","title":"GNU Affero General Public License","text":"“source code” work means preferred form work making modifications . “Object code” means non-source form work. “Standard Interface” means interface either official standard defined recognized standards body, , case interfaces specified particular programming language, one widely used among developers working language. “System Libraries” executable work include anything, work whole, () included normal form packaging Major Component, part Major Component, (b) serves enable use work Major Component, implement Standard Interface implementation available public source code form. “Major Component”, context, means major essential component (kernel, window system, ) specific operating system () executable work runs, compiler used produce work, object code interpreter used run . “Corresponding Source” work object code form means source code needed generate, install, (executable work) run object code modify work, including scripts control activities. However, include work’s System Libraries, general-purpose tools generally available free programs used unmodified performing activities part work. example, Corresponding Source includes interface definition files associated source files work, source code shared libraries dynamically linked subprograms work specifically designed require, intimate data communication control flow subprograms parts work. Corresponding Source need include anything users can regenerate automatically parts Corresponding Source. Corresponding Source work source code form work.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_2-basic-permissions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"2. Basic Permissions.","title":"GNU Affero General Public License","text":"rights granted License granted term copyright Program, irrevocable provided stated conditions met. License explicitly affirms unlimited permission run unmodified Program. output running covered work covered License output, given content, constitutes covered work. License acknowledges rights fair use equivalent, provided copyright law. may make, run propagate covered works convey, without conditions long license otherwise remains force. may convey covered works others sole purpose make modifications exclusively , provide facilities running works, provided comply terms License conveying material control copyright. thus making running covered works must exclusively behalf, direction control, terms prohibit making copies copyrighted material outside relationship . Conveying circumstances permitted solely conditions stated . Sublicensing allowed; section 10 makes unnecessary.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_3-protecting-users-legal-rights-from-anti-circumvention-law","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"3. Protecting Users’ Legal Rights From Anti-Circumvention Law.","title":"GNU Affero General Public License","text":"covered work shall deemed part effective technological measure applicable law fulfilling obligations article 11 WIPO copyright treaty adopted 20 December 1996, similar laws prohibiting restricting circumvention measures. convey covered work, waive legal power forbid circumvention technological measures extent circumvention effected exercising rights License respect covered work, disclaim intention limit operation modification work means enforcing, work’s users, third parties’ legal rights forbid circumvention technological measures.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_4-conveying-verbatim-copies","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"4. Conveying Verbatim Copies.","title":"GNU Affero General Public License","text":"may convey verbatim copies Program’s source code receive , medium, provided conspicuously appropriately publish copy appropriate copyright notice; keep intact notices stating License non-permissive terms added accord section 7 apply code; keep intact notices absence warranty; give recipients copy License along Program. may charge price price copy convey, may offer support warranty protection fee.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_5-conveying-modified-source-versions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"5. Conveying Modified Source Versions.","title":"GNU Affero General Public License","text":"may convey work based Program, modifications produce Program, form source code terms section 4, provided also meet conditions: work must carry prominent notices stating modified , giving relevant date. work must carry prominent notices stating released License conditions added section 7. requirement modifies requirement section 4 “keep intact notices”. must license entire work, whole, License anyone comes possession copy. License therefore apply, along applicable section 7 additional terms, whole work, parts, regardless packaged. License gives permission license work way, invalidate permission separately received . work interactive user interfaces, must display Appropriate Legal Notices; however, Program interactive interfaces display Appropriate Legal Notices, work need make . compilation covered work separate independent works, nature extensions covered work, combined form larger program, volume storage distribution medium, called “aggregate” compilation resulting copyright used limit access legal rights compilation’s users beyond individual works permit. Inclusion covered work aggregate cause License apply parts aggregate.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_6-conveying-non-source-forms","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"6. Conveying Non-Source Forms.","title":"GNU Affero General Public License","text":"may convey covered work object code form terms sections 4 5, provided also convey machine-readable Corresponding Source terms License, one ways: Convey object code , embodied , physical product (including physical distribution medium), accompanied Corresponding Source fixed durable physical medium customarily used software interchange. Convey object code , embodied , physical product (including physical distribution medium), accompanied written offer, valid least three years valid long offer spare parts customer support product model, give anyone possesses object code either (1) copy Corresponding Source software product covered License, durable physical medium customarily used software interchange, price reasonable cost physically performing conveying source, (2) access copy Corresponding Source network server charge. Convey individual copies object code copy written offer provide Corresponding Source. alternative allowed occasionally noncommercially, received object code offer, accord subsection 6b. Convey object code offering access designated place (gratis charge), offer equivalent access Corresponding Source way place charge. need require recipients copy Corresponding Source along object code. place copy object code network server, Corresponding Source may different server (operated third party) supports equivalent copying facilities, provided maintain clear directions next object code saying find Corresponding Source. Regardless server hosts Corresponding Source, remain obligated ensure available long needed satisfy requirements. Convey object code using peer--peer transmission, provided inform peers object code Corresponding Source work offered general public charge subsection 6d. separable portion object code, whose source code excluded Corresponding Source System Library, need included conveying object code work. “User Product” either (1) “consumer product”, means tangible personal property normally used personal, family, household purposes, (2) anything designed sold incorporation dwelling. determining whether product consumer product, doubtful cases shall resolved favor coverage. particular product received particular user, “normally used” refers typical common use class product, regardless status particular user way particular user actually uses, expects expected use, product. product consumer product regardless whether product substantial commercial, industrial non-consumer uses, unless uses represent significant mode use product. “Installation Information” User Product means methods, procedures, authorization keys, information required install execute modified versions covered work User Product modified version Corresponding Source. information must suffice ensure continued functioning modified object code case prevented interfered solely modification made. convey object code work section , , specifically use , User Product, conveying occurs part transaction right possession use User Product transferred recipient perpetuity fixed term (regardless transaction characterized), Corresponding Source conveyed section must accompanied Installation Information. requirement apply neither third party retains ability install modified object code User Product (example, work installed ROM). requirement provide Installation Information include requirement continue provide support service, warranty, updates work modified installed recipient, User Product modified installed. Access network may denied modification materially adversely affects operation network violates rules protocols communication across network. Corresponding Source conveyed, Installation Information provided, accord section must format publicly documented (implementation available public source code form), must require special password key unpacking, reading copying.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_7-additional-terms","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"7. Additional Terms.","title":"GNU Affero General Public License","text":"“Additional permissions” terms supplement terms License making exceptions one conditions. Additional permissions applicable entire Program shall treated though included License, extent valid applicable law. additional permissions apply part Program, part may used separately permissions, entire Program remains governed License without regard additional permissions. convey copy covered work, may option remove additional permissions copy, part . (Additional permissions may written require removal certain cases modify work.) may place additional permissions material, added covered work, can give appropriate copyright permission. Notwithstanding provision License, material add covered work, may (authorized copyright holders material) supplement terms License terms: Disclaiming warranty limiting liability differently terms sections 15 16 License; Requiring preservation specified reasonable legal notices author attributions material Appropriate Legal Notices displayed works containing ; Prohibiting misrepresentation origin material, requiring modified versions material marked reasonable ways different original version; Limiting use publicity purposes names licensors authors material; Declining grant rights trademark law use trade names, trademarks, service marks; Requiring indemnification licensors authors material anyone conveys material (modified versions ) contractual assumptions liability recipient, liability contractual assumptions directly impose licensors authors. non-permissive additional terms considered “restrictions” within meaning section 10. Program received , part , contains notice stating governed License along term restriction, may remove term. license document contains restriction permits relicensing conveying License, may add covered work material governed terms license document, provided restriction survive relicensing conveying. add terms covered work accord section, must place, relevant source files, statement additional terms apply files, notice indicating find applicable terms. Additional terms, permissive non-permissive, may stated form separately written license, stated exceptions; requirements apply either way.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_8-termination","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"8. Termination.","title":"GNU Affero General Public License","text":"may propagate modify covered work except expressly provided License. attempt otherwise propagate modify void, automatically terminate rights License (including patent licenses granted third paragraph section 11). However, cease violation License, license particular copyright holder reinstated () provisionally, unless copyright holder explicitly finally terminates license, (b) permanently, copyright holder fails notify violation reasonable means prior 60 days cessation. Moreover, license particular copyright holder reinstated permanently copyright holder notifies violation reasonable means, first time received notice violation License (work) copyright holder, cure violation prior 30 days receipt notice. Termination rights section terminate licenses parties received copies rights License. rights terminated permanently reinstated, qualify receive new licenses material section 10.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_9-acceptance-not-required-for-having-copies","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"9. Acceptance Not Required for Having Copies.","title":"GNU Affero General Public License","text":"required accept License order receive run copy Program. Ancillary propagation covered work occurring solely consequence using peer--peer transmission receive copy likewise require acceptance. However, nothing License grants permission propagate modify covered work. actions infringe copyright accept License. Therefore, modifying propagating covered work, indicate acceptance License .","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_10-automatic-licensing-of-downstream-recipients","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"10. Automatic Licensing of Downstream Recipients.","title":"GNU Affero General Public License","text":"time convey covered work, recipient automatically receives license original licensors, run, modify propagate work, subject License. responsible enforcing compliance third parties License. “entity transaction” transaction transferring control organization, substantially assets one, subdividing organization, merging organizations. propagation covered work results entity transaction, party transaction receives copy work also receives whatever licenses work party’s predecessor interest give previous paragraph, plus right possession Corresponding Source work predecessor interest, predecessor can get reasonable efforts. may impose restrictions exercise rights granted affirmed License. example, may impose license fee, royalty, charge exercise rights granted License, may initiate litigation (including cross-claim counterclaim lawsuit) alleging patent claim infringed making, using, selling, offering sale, importing Program portion .","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_11-patents","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"11. Patents.","title":"GNU Affero General Public License","text":"“contributor” copyright holder authorizes use License Program work Program based. work thus licensed called contributor’s “contributor version”. contributor’s “essential patent claims” patent claims owned controlled contributor, whether already acquired hereafter acquired, infringed manner, permitted License, making, using, selling contributor version, include claims infringed consequence modification contributor version. purposes definition, “control” includes right grant patent sublicenses manner consistent requirements License. contributor grants non-exclusive, worldwide, royalty-free patent license contributor’s essential patent claims, make, use, sell, offer sale, import otherwise run, modify propagate contents contributor version. following three paragraphs, “patent license” express agreement commitment, however denominated, enforce patent (express permission practice patent covenant sue patent infringement). “grant” patent license party means make agreement commitment enforce patent party. convey covered work, knowingly relying patent license, Corresponding Source work available anyone copy, free charge terms License, publicly available network server readily accessible means, must either (1) cause Corresponding Source available, (2) arrange deprive benefit patent license particular work, (3) arrange, manner consistent requirements License, extend patent license downstream recipients. “Knowingly relying” means actual knowledge , patent license, conveying covered work country, recipient’s use covered work country, infringe one identifiable patents country reason believe valid. , pursuant connection single transaction arrangement, convey, propagate procuring conveyance , covered work, grant patent license parties receiving covered work authorizing use, propagate, modify convey specific copy covered work, patent license grant automatically extended recipients covered work works based . patent license “discriminatory” include within scope coverage, prohibits exercise , conditioned non-exercise one rights specifically granted License. may convey covered work party arrangement third party business distributing software, make payment third party based extent activity conveying work, third party grants, parties receive covered work , discriminatory patent license () connection copies covered work conveyed (copies made copies), (b) primarily connection specific products compilations contain covered work, unless entered arrangement, patent license granted, prior 28 March 2007. Nothing License shall construed excluding limiting implied license defenses infringement may otherwise available applicable patent law.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_12-no-surrender-of-others-freedom","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"12. No Surrender of Others’ Freedom.","title":"GNU Affero General Public License","text":"conditions imposed (whether court order, agreement otherwise) contradict conditions License, excuse conditions License. convey covered work satisfy simultaneously obligations License pertinent obligations, consequence may convey . example, agree terms obligate collect royalty conveying convey Program, way satisfy terms License refrain entirely conveying Program.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_13-remote-network-interaction-use-with-the-gnu-general-public-license","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"13. Remote Network Interaction; Use with the GNU General Public License.","title":"GNU Affero General Public License","text":"Notwithstanding provision License, modify Program, modified version must prominently offer users interacting remotely computer network (version supports interaction) opportunity receive Corresponding Source version providing access Corresponding Source network server charge, standard customary means facilitating copying software. Corresponding Source shall include Corresponding Source work covered version 3 GNU General Public License incorporated pursuant following paragraph. Notwithstanding provision License, permission link combine covered work work licensed version 3 GNU General Public License single combined work, convey resulting work. terms License continue apply part covered work, work combined remain governed version 3 GNU General Public License.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_14-revised-versions-of-this-license","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"14. Revised Versions of this License.","title":"GNU Affero General Public License","text":"Free Software Foundation may publish revised /new versions GNU Affero General Public License time time. new versions similar spirit present version, may differ detail address new problems concerns. version given distinguishing version number. Program specifies certain numbered version GNU Affero General Public License “later version” applies , option following terms conditions either numbered version later version published Free Software Foundation. Program specify version number GNU Affero General Public License, may choose version ever published Free Software Foundation. Program specifies proxy can decide future versions GNU Affero General Public License can used, proxy’s public statement acceptance version permanently authorizes choose version Program. Later license versions may give additional different permissions. However, additional obligations imposed author copyright holder result choosing follow later version.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_15-disclaimer-of-warranty","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"15. Disclaimer of Warranty.","title":"GNU Affero General Public License","text":"WARRANTY PROGRAM, EXTENT PERMITTED APPLICABLE LAW. EXCEPT OTHERWISE STATED WRITING COPYRIGHT HOLDERS /PARTIES PROVIDE PROGRAM “” WITHOUT WARRANTY KIND, EITHER EXPRESSED IMPLIED, INCLUDING, LIMITED , IMPLIED WARRANTIES MERCHANTABILITY FITNESS PARTICULAR PURPOSE. ENTIRE RISK QUALITY PERFORMANCE PROGRAM . PROGRAM PROVE DEFECTIVE, ASSUME COST NECESSARY SERVICING, REPAIR CORRECTION.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_16-limitation-of-liability","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"16. Limitation of Liability.","title":"GNU Affero General Public License","text":"EVENT UNLESS REQUIRED APPLICABLE LAW AGREED WRITING COPYRIGHT HOLDER, PARTY MODIFIES /CONVEYS PROGRAM PERMITTED , LIABLE DAMAGES, INCLUDING GENERAL, SPECIAL, INCIDENTAL CONSEQUENTIAL DAMAGES ARISING USE INABILITY USE PROGRAM (INCLUDING LIMITED LOSS DATA DATA RENDERED INACCURATE LOSSES SUSTAINED THIRD PARTIES FAILURE PROGRAM OPERATE PROGRAMS), EVEN HOLDER PARTY ADVISED POSSIBILITY DAMAGES.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_17-interpretation-of-sections-15-and-16","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"17. Interpretation of Sections 15 and 16.","title":"GNU Affero General Public License","text":"disclaimer warranty limitation liability provided given local legal effect according terms, reviewing courts shall apply local law closely approximates absolute waiver civil liability connection Program, unless warranty assumption liability accompanies copy Program return fee. END TERMS CONDITIONS","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"how-to-apply-these-terms-to-your-new-programs","dir":"","previous_headings":"","what":"How to Apply These Terms to Your New Programs","title":"GNU Affero General Public License","text":"develop new program, want greatest possible use public, best way achieve make free software everyone can redistribute change terms. , attach following notices program. safest attach start source file effectively state exclusion warranty; file least “copyright” line pointer full notice found. Also add information contact electronic paper mail. software can interact users remotely computer network, also make sure provides way users get source. example, program web application, interface display “Source” link leads users archive code. many ways offer source, different solutions better different programs; see section 13 specific requirements. also get employer (work programmer) school, , sign “copyright disclaimer” program, necessary. information , apply follow GNU AGPL, see https://www.gnu.org/licenses/.","code":" Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see ."},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"sdms-with-tidymodels","dir":"Articles","previous_headings":"","what":"SDMs with tidymodels","title":"tidysdm overview","text":"Species Distribution Modelling relies several algorithms, many number hyperparameters require turning. tidymodels universe includes number packages specifically design fit, tune validate models. advantage tidymodels models syntax results returned users standardised, thus providing coherent interface modelling. Given variety models required SDM, tidymodels ideal framework. tidysdm provides number wrappers specialised functions facilitate fitting SDM tidymodels. article provides overview tidysdm facilitates fitting SDMs. articles, detailing use package palaeodata, fitting complex models troubleshoot models can found tidisdm website. tidysdm relies tidymodels, users advised familiarise introductory tutorials tidymodels website. load tidysdm, automatically loads tidymodels associated packages necessary fit models:","code":"library(tidysdm) #> Loading required package: tidymodels #> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ recipes 1.1.0 #> ✔ dials 1.3.0 ✔ rsample 1.2.1 #> ✔ dplyr 1.1.4 ✔ tibble 3.2.1 #> ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1 #> ✔ infer 1.0.7 ✔ tune 1.2.1 #> ✔ modeldata 1.4.0 ✔ workflows 1.1.4 #> ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0 #> ✔ purrr 1.0.2 ✔ yardstick 1.3.1 #> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── #> ✖ purrr::discard() masks scales::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() #> • Dig deeper into tidy modeling with R at https://www.tmwr.org #> Loading required package: spatialsample"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"accessing-the-data-for-this-vignette-how-to-use-rgbif","dir":"Articles","previous_headings":"SDMs with tidymodels","what":"Accessing the data for this vignette: how to use rgbif","title":"tidysdm overview","text":"start reading set presences species lizard inhabits Iberian peninsula, Lacerta schreiberi. data taken GBIF Occurrence Download (6 July 2023) https://doi.org/10.15468/dl.srq3b3. dataset already included tidysdm package: Alternatively, can easily access manipulate dataset using rbgif. Note data GBIF often requires level cleaning. use simple cleaning function CoordinateCleaner; general, recommend inspect data flagged problematic, rather just accepting :","code":"data(lacerta) head(lacerta) #> # A tibble: 6 × 3 #> ID latitude longitude #> #> 1 858029749 42.6 -7.09 #> 2 858029738 42.6 -7.09 #> 3 614631090 41.4 -7.90 #> 4 614631085 41.3 -7.81 #> 5 614631083 41.3 -7.81 #> 6 614631080 41.4 -7.83 # download presences library(rgbif) occ_download_get(key = \"0068808-230530130749713\", path = tempdir()) # read file library(readr) distrib <- read_delim(file.path(tempdir(), \"0068808-230530130749713.zip\")) # keep the necessary columns and rename them lacerta <- distrib %>% select(gbifID, decimalLatitude, decimalLongitude) %>% rename(ID = gbifID, latitude = decimalLatitude, longitude = decimalLongitude) # clean up the data library(CoordinateCleaner) lacerta <- clean_coordinates(x = lacerta, lon = \"longitude\", lat = \"latitude\", species = \"ID\", value = \"clean\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"preparing-your-data","dir":"Articles","previous_headings":"","what":"Preparing your data","title":"tidysdm overview","text":"First, let us visualise presences plotting map. tidysdm works sf objects represent locations, cast coordinates sf object, set projections standard ‘lonlat’ (crs = 4326). usually advisable plot locations directly raster used extract climatic variables, see locations fall within discrete space raster. vignette, use WorldClim source climatic information. access WorldClim data via library pastclim; even though library, name suggests, mostly designed handle palaeoclimatic reconstructions, also provides convenient functions access present day reconstructions future projections. pastclim handy function get land mask available datasets, can use background locations. cut raster Iberian peninsula, lizard lives. simply illustration, bother project raster, equal area projection desirable… plotting, take advantage tidyterra, makes handling terra rasters ggplot breeze.","code":"library(sf) #> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE lacerta <- st_as_sf(lacerta, coords = c(\"longitude\", \"latitude\")) st_crs(lacerta) <- 4326 library(pastclim) download_dataset(dataset = \"WorldClim_2.1_10m\") land_mask <- get_land_mask(time_ce = 1985, dataset = \"WorldClim_2.1_10m\") # Iberia peninsula extension iberia_poly <- terra::vect( \"POLYGON((-9.8 43.3,-7.8 44.1,-2.0 43.7,3.6 42.5,3.8 41.5,1.3 40.8,0.3 39.5, 0.9 38.6,-0.4 37.5,-1.6 36.7,-2.3 36.3,-4.1 36.4,-4.5 36.4,-5.0 36.1, -5.6 36.0,-6.3 36.0,-7.1 36.9,-9.5 36.6,-9.4 38.0,-10.6 38.9,-9.5 40.8, -9.8 43.3))\" ) crs(iberia_poly) <- \"lonlat\" # crop the extent land_mask <- crop(land_mask, iberia_poly) # and mask to the polygon land_mask <- mask(land_mask, iberia_poly) #> Loading required package: terra #> terra 1.7.78 #> #> Attaching package: 'terra' #> The following object is masked from 'package:tidyr': #> #> extract #> The following object is masked from 'package:scales': #> #> rescale #> [1] TRUE library(tidyterra) #> #> Attaching package: 'tidyterra' #> The following object is masked from 'package:stats': #> #> filter library(ggplot2) ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) + geom_sf(data = lacerta) + guides(fill=\"none\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"thinning-step","dir":"Articles","previous_headings":"","what":"Thinning step","title":"tidysdm overview","text":"Now, thin observations one per cell raster (better equal area projection…): Now, thin remove points closer 20km. However, note standard map units ‘lonlat’ projection meters. tidysdm provides convening conversion function, km2m(), avoid write lots zeroes): Let’s see left points: now need select points represent potential available area species. two approaches, can either sample background sample_background(), can generate pseudo-absences sample_pseudoabs(). example, sample background; specifically, attempt account potential sampling biases using target group approach, presences species within taxonomic group used condition sampling background, providing information differential sampling different areas within region interest. start downloading records 8 genera Lacertidae, covering geographic region Iberian peninsula GBIF https://doi.org/10.15468/dl.53js5z: need convert observations raster whose values number records (later used determine likely cell used background point): can see sampling far random, certain locations large number records. can now sample background, using ‘bias’ method represent heterogeneity sampling effort: Let’s see presences background: can use pastclim download WorldClim dataset (’ll use 10 arc-minute resolution) extract bioclimatic variables available (use pastclim, use raster dataset access , loading directly terra). Note dataset covers period 1970-2000, pastclim dates 1985 (midpoint). also cropped directly Iberian peninsula. Note , vignette, focus continuous variables; machine learning algorithms natively cope multi-level factors, possible use recipes::step_dummy() generate dummy variables factors. worked example can found article additional features tidymodels tidysdm. Next, extract climate presences background points: Based paper (https://doi.org/10.1007/s10531-010-9865-2), interested variables: “bio06”, “bio05”, “bio13”, “bio14”, “bio15”. can visualise differences presences background using violin plots: can see variables interest seem different distribution presences background. can formally quantify mismatch two computing overlap: , can see variables interest seem good candidates clear signal. Let us focus variables: Environmental variables often highly correlated, collinearity issue several types models. can inspect correlation among variables : can see variables rather high correlation (e.g. bio05 vs bio14). can subset variables certain threshold correlation (e.g. 0.7) : , removing bio14 leaves us set uncorrelated variables. Note filter_collinear methods based variable inflation also worth exploring. example, remove bio14 work remaining variables.","code":"set.seed(1234567) lacerta <- thin_by_cell(lacerta, raster = land_mask) nrow(lacerta) #> [1] 226 ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) + geom_sf(data = lacerta) + guides(fill=\"none\") set.seed(1234567) lacerta_thin <- thin_by_dist(lacerta, dist_min = km2m(20)) nrow(lacerta_thin) #> [1] 111 ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) + geom_sf(data = lacerta_thin) + guides(fill=\"none\") library(rgbif) occ_download_get(key = \"0121761-240321170329656\", path = tempdir()) library(readr) backg_distrib <- readr::read_delim(file.path(tempdir(), \"0121761-240321170329656.zip\")) # keep the necessary columns lacertidae_background <- backg_distrib %>% select(gbifID, decimalLatitude, decimalLongitude) %>% rename(ID = gbifID, latitude = decimalLatitude, longitude = decimalLongitude) # convert to an sf object lacertidae_background <- st_as_sf(lacertidae_background, coords = c(\"longitude\", \"latitude\")) st_crs(lacertidae_background) <- 4326 lacertidae_background_raster <- rasterize(lacertidae_background, land_mask, fun = \"count\") plot(lacertidae_background_raster) set.seed(1234567) lacerta_thin <- sample_background(data = lacerta_thin, raster = lacertidae_background_raster, n = 3 * nrow(lacerta_thin), method = \"bias\", class_label = \"background\", return_pres = TRUE) ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) + geom_sf(data = lacerta_thin, aes(col = class)) + guides(fill=\"none\") download_dataset(\"WorldClim_2.1_10m\") climate_vars <- get_vars_for_dataset(\"WorldClim_2.1_10m\") climate_present <- pastclim::region_slice( time_ce = 1985, bio_variables = climate_vars, data = \"WorldClim_2.1_10m\", crop = iberia_poly ) lacerta_thin <- lacerta_thin %>% bind_cols(terra::extract(climate_present, lacerta_thin, ID = FALSE)) lacerta_thin %>% plot_pres_vs_bg(class) lacerta_thin %>% dist_pres_vs_bg(class) #> bio09 bio12 bio16 bio19 bio13 bio05 bio10 #> 0.43907819 0.41888524 0.41487381 0.40742724 0.40492411 0.38854703 0.38610145 #> bio02 bio07 bio04 bio08 bio17 bio15 bio18 #> 0.35191109 0.35036167 0.32450555 0.31879785 0.28143659 0.27152095 0.25007068 #> bio01 bio14 bio03 bio11 altitude bio06 #> 0.24589097 0.24294699 0.18414624 0.11169528 0.07271380 0.06742951 suggested_vars <- c(\"bio06\", \"bio05\", \"bio13\", \"bio14\", \"bio15\") pairs(climate_present[[suggested_vars]]) climate_present <- climate_present[[suggested_vars]] vars_uncor <- filter_collinear(climate_present, cutoff = 0.7, method = \"cor_caret\") vars_uncor #> [1] \"bio15\" \"bio05\" \"bio13\" \"bio06\" #> attr(,\"to_remove\") #> [1] \"bio14\" lacerta_thin <- lacerta_thin %>% select(all_of(c(vars_uncor, \"class\"))) climate_present <- climate_present[[vars_uncor]] names(climate_present) # added to highlight which variables are retained in the end #> [1] \"bio15\" \"bio05\" \"bio13\" \"bio06\""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"fit-the-model-by-cross-validation","dir":"Articles","previous_headings":"","what":"Fit the model by cross-validation","title":"tidysdm overview","text":"Next, need set recipe define handle dataset. don’t want anything data terms transformations, just need define formula (class outcome, variables predictors; note , sf objects, geometry automatically replaced X Y columns assigned role coords, thus used predictors): classification models tidymodels, assumption level interest response (case, presences) reference level. can confirm data correctly formatted : now build workflow_set different models, defining hyperparameters want tune. use glm, random forest, boosted_trees maxent models (details use workflow_sets, see tutorial). latter three models tunable hyperparameters. commonly used models, tidysdm automatically chooses important parameters, possible fully customise model specifications (e.g. see help sdm_spec_rf). Note , used GAMs sdm_spec_gam(), necessary update model gam_formula() due non-standard formula notation GAMs (see help sdm_spec_gam() example ). now want set spatial block cross-validation scheme tune assess models. split data creating 3 folds. use spatial_block_cv function package spatialsample. spatialsample offers number sampling approaches spatial data; also possible convert objects created blockCV (offers features spatial sampling, stratified sampling) rsample object suitable tisysdm function blockcv2rsample. can now use block CV folds tune assess models (keep computations fast, explore 3 combination hyperparameters per model; far little real life!): Note workflow_set correctly detects tuning parameters glm. can look performance models : Now let’s create ensemble, selecting best set parameters model (really relevant random forest, hype-parameters tune glm gam). use Boyce continuous index metric choose best random forest boosted tree. adding members ensemble, automatically fitted full training dataset, ready make predictions. visualise tabular form model metrics can obtained :","code":"lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) lacerta_rec #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 4 #> coords: 2 lacerta_thin %>% check_sdm_presence(class) #> [1] TRUE lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf(), # boosted tree model (gbm) specs with tuning gbm = sdm_spec_boost_tree(), # maxent specs with tuning maxent = sdm_spec_maxent() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) library(tidysdm) set.seed(100) #lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) lacerta_cv <- spatial_block_cv(data = lacerta_thin, v = 3, n = 5) autoplot(lacerta_cv) set.seed(1234567) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 4 resampling: default_glm #> ✔ 1 of 4 resampling: default_glm (193ms) #> i 2 of 4 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 2 of 4 tuning: default_rf (828ms) #> i 3 of 4 tuning: default_gbm #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 3 of 4 tuning: default_gbm (3.9s) #> i 4 of 4 tuning: default_maxent #> ✔ 4 of 4 tuning: default_maxent (1.2s) autoplot(lacerta_models) lacerta_ensemble <- simple_ensemble() %>% add_member(lacerta_models, metric = \"boyce_cont\") lacerta_ensemble #> A simple_ensemble of models #> #> Members: #> • default_glm #> • default_rf #> • default_gbm #> • default_maxent #> #> Available metrics: #> • boyce_cont #> • roc_auc #> • tss_max #> #> Metric used to tune workflows: #> • boyce_cont autoplot(lacerta_ensemble) lacerta_ensemble %>% collect_metrics() #> # A tibble: 12 × 5 #> wflow_id .metric mean std_err n #> #> 1 default_glm boyce_cont 0.573 0.115 3 #> 2 default_glm roc_auc 0.775 0.0138 3 #> 3 default_glm tss_max 0.486 0.0337 3 #> 4 default_rf boyce_cont 0.709 0.0856 3 #> 5 default_rf roc_auc 0.794 0.00648 3 #> 6 default_rf tss_max 0.537 0.0363 3 #> 7 default_gbm boyce_cont 0.659 0.0472 3 #> 8 default_gbm roc_auc 0.789 0.00707 3 #> 9 default_gbm tss_max 0.524 0.0152 3 #> 10 default_maxent boyce_cont 0.651 0.157 3 #> 11 default_maxent roc_auc 0.804 0.00653 3 #> 12 default_maxent tss_max 0.572 0.0111 3"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"projecting-to-the-present","dir":"Articles","previous_headings":"","what":"Projecting to the present","title":"tidysdm overview","text":"can now make predictions ensemble (using default option taking mean predictions model). can subset ensemble use best models, based Boyce continuous index, setting minimum threshold 0.7 metric. also take median available model predictions (instead mean, default). plot change much (models quite consistent). Sometimes, desirable binary predictions (presence vs absence), rather probability occurrence. , first need calibrate threshold used convert probabilities classes (case, optimise TSS): now can predict whole continent:","code":"prediction_present <- predict_raster(lacerta_ensemble, climate_present) ggplot() + geom_spatraster(data = prediction_present, aes(fill = mean)) + scale_fill_terrain_c() + # plot presences used in the model geom_sf(data = lacerta_thin %>% filter(class == \"presence\")) prediction_present_boyce <- predict_raster(lacerta_ensemble, climate_present, metric_thresh = c(\"boyce_cont\", 0.7), fun = \"median\" ) ggplot() + geom_spatraster(data = prediction_present_boyce, aes(fill = median)) + scale_fill_terrain_c() + geom_sf(data = lacerta_thin %>% filter(class == \"presence\")) lacerta_ensemble <- calib_class_thresh(lacerta_ensemble, class_thresh = \"tss_max\", metric_thresh = c(\"boyce_cont\", 0.7) ) prediction_present_binary <- predict_raster(lacerta_ensemble, climate_present, type = \"class\", class_thresh = c(\"tss_max\"), metric_thresh = c(\"boyce_cont\", 0.7) ) ggplot() + geom_spatraster(data = prediction_present_binary, aes(fill = binary_mean)) + geom_sf(data = lacerta_thin %>% filter(class == \"presence\")) + scale_fill_discrete(na.value = \"transparent\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"projecting-to-the-future","dir":"Articles","previous_headings":"","what":"Projecting to the future","title":"tidysdm overview","text":"WorldClim wide selection projections future based different models Shared Socio-economic Pathways (SSP). Type help(\"WorldClim_2.1\") full list. use predictions based “HadGEM3-GC31-LL” model SSP 245 (intermediate green house gas emissions) resolution present day data (10 arc-minutes). first download data: Let’s see times available: predict 2090, prediction future available. Let’s now check available variables: Note future predictions include altitude (change time), needed , copy present. However, set uncorrelated variables used earlier, don’t need worry . predict using ensemble:","code":"download_dataset(\"WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m\") get_time_ce_steps(\"WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m\") #> [1] 2030 2050 2070 2090 get_vars_for_dataset(\"WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m\") #> [1] \"bio01\" \"bio02\" \"bio03\" \"bio04\" \"bio05\" \"bio06\" \"bio07\" \"bio08\" \"bio09\" #> [10] \"bio10\" \"bio11\" \"bio12\" \"bio13\" \"bio14\" \"bio15\" \"bio16\" \"bio17\" \"bio18\" #> [19] \"bio19\" climate_future <- pastclim::region_slice( time_ce = 2090, bio_variables = vars_uncor, data = \"WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m\", crop = iberia_poly ) prediction_future <- predict_raster(lacerta_ensemble, climate_future) ggplot() + geom_spatraster(data = prediction_future, aes(fill = mean)) + scale_fill_terrain_c()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"dealing-with-extrapolation","dir":"Articles","previous_headings":"","what":"Dealing with extrapolation","title":"tidysdm overview","text":"total area projection model may include environmental conditions lie outside range conditions covered calibration dataset. phenomenon can lead misinterpretation SDM outcomes due spatial extrapolation. tidysdm offers couple approaches deal problem. simplest one can clamp environmental variables stay within limits observed calibration set: predictions seem changed little. alternative allow values exceed ranges calibration set, compute Multivariate environmental similarity surfaces (MESS) (Elith et al. 2010) highlight areas extrapolation occurs thus visualise prediction’s uncertainty. estimate MESS future time slice used : Extrapolation occurs areas MESS values negative, magnitude negative values indicating extreme interpolation. plot, can see area extrapolation model already predicted suitability zero. explains clamping little predictions. can now overlay MESS values current prediction visualize areas characterized spatial extrapolation. Note clamping MESS useful making predictions future, also past present (latter case, allows us make sure background/pseudoabsences cover full range predictor variables area interest). tidymodels universe also includes functions estimate area applicability package waywiser, can used tidysdm.","code":"climate_future_clamped <- clamp_predictors(climate_future, training = lacerta_thin, .col= class) prediction_future_clamped <- predict_raster(lacerta_ensemble, raster = climate_future_clamped) ggplot() + geom_spatraster(data = prediction_future_clamped, aes(fill = mean)) + scale_fill_terrain_c() lacerta_mess_future <- extrapol_mess(x = climate_future, training = lacerta_thin, .col = \"class\") ggplot() + geom_spatraster(data = lacerta_mess_future) + scale_fill_viridis_b(na.value = \"transparent\") # subset mess lacerta_mess_future_subset <- lacerta_mess_future lacerta_mess_future_subset[lacerta_mess_future_subset >= 0] <- NA lacerta_mess_future_subset[lacerta_mess_future_subset < 0] <- 1 # convert into polygon lacerta_mess_future_subset <- as.polygons(lacerta_mess_future_subset) # plot as a mask ggplot() + geom_spatraster(data = prediction_future) + scale_fill_viridis_b(na.value = \"transparent\") + geom_sf(data = lacerta_mess_future_subset, fill= \"lightgray\", alpha = 0.5, linewidth = 0.5)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"visualising-the-contribution-of-individual-variables","dir":"Articles","previous_headings":"","what":"Visualising the contribution of individual variables","title":"tidysdm overview","text":"sometimes interest understand relative contribution individual variables prediction. complex task, especially interactions among variables. simpler linear models, possible obtain marginal response curves (show effect variable whilst keeping variables mean) using step_profile() recipes package. use step_profile() define new recipe can bake generate appropriate dataset make marginal prediction. can plot predictions values variable interest. example, investigate contribution bio05, : also possible use DALEX,explore tidysdm models; see details tidymodels additions article.","code":"bio05_prof <- lacerta_rec %>% step_profile(-bio05, profile = vars(bio05)) %>% prep(training = lacerta_thin) bio05_data <- bake(bio05_prof, new_data = NULL) bio05_data <- bio05_data %>% mutate( pred = predict(lacerta_ensemble, bio05_data)$mean ) ggplot(bio05_data, aes(x = bio05, y = pred)) + geom_point(alpha = .5, cex = 1)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"repeated-ensembles","dir":"Articles","previous_headings":"","what":"Repeated ensembles","title":"tidysdm overview","text":"steps thinning sampling pseudo-absences can bit impact performance SDMs. steps stochastic, good practice explore effect repeating , creating ensembles models repeats. tidysdm, possible create repeat_ensembles. start creating list simple_ensembles, looping SDM pipeline. just use two fast models speed process. Now can create repeat_ensemble list: can summarise goodness fit models repeat collect_metrics(), autoplot() function repeated_ensemble objects. can predict usual way (take mean median models):","code":"# empty object to store the simple ensembles that we will create ensemble_list <- list() set.seed(123) # make sure you set the seed OUTSIDE the loop for (i_repeat in 1:3) { # thin the data lacerta_thin_rep <- thin_by_cell(lacerta, raster = climate_present) lacerta_thin_rep <- thin_by_dist(lacerta_thin_rep, dist_min = 20000) # sample pseudo-absences lacerta_thin_rep <- sample_pseudoabs(lacerta_thin_rep, n = 3 * nrow(lacerta_thin_rep), raster = climate_present, method = c(\"dist_min\", 50000) ) # get climate lacerta_thin_rep <- lacerta_thin_rep %>% bind_cols(terra::extract(climate_present, lacerta_thin_rep, ID = FALSE)) # create folds lacerta_thin_rep_cv <- spatial_block_cv(lacerta_thin_rep, v = 5) # create a recipe lacerta_thin_rep_rec <- recipe(lacerta_thin_rep, formula = class ~ .) # create a workflow_set lacerta_thin_rep_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_thin_rep_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # maxent specs with tuning maxent = sdm_spec_maxent() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) # train the model lacerta_thin_rep_models <- lacerta_thin_rep_models %>% workflow_map(\"tune_grid\", resamples = lacerta_thin_rep_cv, grid = 10, metrics = sdm_metric_set(), verbose = TRUE ) # make an simple ensemble and add it to the list ensemble_list[[i_repeat]] <- simple_ensemble() %>% add_member(lacerta_thin_rep_models, metric = \"boyce_cont\") } #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (228ms) #> i 2 of 2 tuning: default_maxent #> ✔ 2 of 2 tuning: default_maxent (6.8s) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (228ms) #> i 2 of 2 tuning: default_maxent #> ✔ 2 of 2 tuning: default_maxent (7.1s) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (235ms) #> i 2 of 2 tuning: default_maxent #> ✔ 2 of 2 tuning: default_maxent (7.2s) lacerta_rep_ens <- repeat_ensemble() %>% add_repeat(ensemble_list) lacerta_rep_ens #> A repeat_ensemble of models #> #> Number of repeats: #> • 3 #> #> Members: #> • default_glm #> • default_maxent #> #> Available metrics: #> • boyce_cont #> • roc_auc #> • tss_max #> #> Metric used to tune workflows: #> • boyce_cont lacerta_rep_ens <- predict_raster(lacerta_rep_ens, climate_present, fun = c(\"mean\", \"median\") ) ggplot() + geom_spatraster(data = lacerta_rep_ens, aes(fill = median)) + scale_fill_terrain_c()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a1_palaeodata_application.html","id":"sdms-with-tidymodels-for-palaeo-data","dir":"Articles","previous_headings":"","what":"SDMs with tidymodels for palaeo data","title":"Application with palaeodata","text":"article, show Species Distribution Model can fitted tidysdm time-scattered (.e.palaeontological, archaeozoological, archaeological) data, samples covering different time periods. recommend users first read “tidysdm overview” article, introduces number functions concepts used present article. first load tidysdm:","code":"library(tidysdm) #> Loading required package: tidymodels #> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ recipes 1.1.0 #> ✔ dials 1.3.0 ✔ rsample 1.2.1 #> ✔ dplyr 1.1.4 ✔ tibble 3.2.1 #> ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1 #> ✔ infer 1.0.7 ✔ tune 1.2.1 #> ✔ modeldata 1.4.0 ✔ workflows 1.1.4 #> ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0 #> ✔ purrr 1.0.2 ✔ yardstick 1.3.1 #> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── #> ✖ purrr::discard() masks scales::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() #> • Learn how to get started at https://www.tidymodels.org/start/ #> Loading required package: spatialsample"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a1_palaeodata_application.html","id":"preparing-your-data","dir":"Articles","previous_headings":"","what":"Preparing your data","title":"Application with palaeodata","text":"start loading set radiocarbon dates (calibrated) horses, covering 22k years ago 8k years ago. convert dataset sf data.frame can easily plot (tidyterra shines): background presences, use land mask present, taken pastclim, cut cover Europe: use tidyterra plot: now thin presences, locations 100km 2000 years apart. see left: now need time series palaeoclimate reconstructions. vignette, use example dataset pastclim. dataset reconstructions every 5k years past 20k years 1 degree resolution, 3 bioclimatic variables. suffice illustrative purposes, recommend download higher quality datasets pastclim real analysis. land mask, cut reconstructions cover Europe : Now thin observations keep one per cell raster (better equal area projection…), remove locations outside desired area (): Let’s see left points: Now sample pseudo-absences (constraint least 70km away presences), selecting three times number presences Let’s see presences absences: Now let’s get climate location. pastclim requires data frame two columns coordinates column time years present (negative values represent time past). manipulate sf object accordingly:","code":"data(horses) horses #> # A tibble: 788 × 3 #> latitude longitude time_bp #> #> 1 43.2 -2.04 -14000 #> 2 43.2 -2.04 -14000 #> 3 43.2 -2.04 -14000 #> 4 43.2 -2.04 -14000 #> 5 43.2 -2.04 -16000 #> 6 43.3 -1.89 -16000 #> 7 43.2 -2.2 -14000 #> 8 43.2 -2.2 -19000 #> 9 43.2 -2.2 -20000 #> 10 43.2 -2.2 -21000 #> # ℹ 778 more rows library(sf) #> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE horses <- st_as_sf(horses, coords = c(\"longitude\", \"latitude\")) st_crs(horses) <- 4326 #> Loading required package: terra #> terra 1.7.78 #> #> Attaching package: 'terra' #> The following object is masked from 'package:tidyr': #> #> extract #> The following object is masked from 'package:scales': #> #> rescale library(pastclim) land_mask <- pastclim::get_land_mask(time_bp = 0, dataset = \"Example\") europe_poly <- vect(region_outline$Europe) crs(europe_poly) <- \"lonlat\" land_mask <- crop(land_mask, europe_poly) land_mask <- mask(land_mask, europe_poly) library(tidyterra) #> #> Attaching package: 'tidyterra' #> The following object is masked from 'package:stats': #> #> filter ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_0)) + geom_sf(data = horses, aes(col = time_bp)) set.seed(123) horses <- thin_by_dist_time(horses, dist_min = km2m(100), interval_min = y2d(2000), time_col = \"time_bp\", lubridate_fun = pastclim::ybp2date ) nrow(horses) #> [1] 185 ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_0)) + geom_sf(data = horses, aes(col = time_bp)) library(pastclim) climate_vars <- c(\"bio01\", \"bio10\", \"bio12\") climate_full <- pastclim::region_series( bio_variables = climate_vars, data = \"Example\", crop = region_outline$Europe ) set.seed(123) horses <- thin_by_cell_time(horses, raster = climate_full, time_col = \"time_bp\", lubridate_fun = pastclim::ybp2date ) nrow(horses) #> [1] 138 ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_0)) + geom_sf(data = horses, aes(col = time_bp)) set.seed(123) horses <- sample_pseudoabs_time(horses, n_per_presence = 3, raster = climate_full, time_col = \"time_bp\", lubridate_fun = pastclim::ybp2date, method = c(\"dist_min\", km2m(70)) ) ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_0)) + geom_sf(data = horses, aes(col = class)) horses_df <- horses %>% dplyr::bind_cols(sf::st_coordinates(horses)) %>% mutate(time_bp = date2ybp(time_step)) %>% as.data.frame() %>% select(-geometry) # get climate horses_df <- location_slice_from_region_series(horses_df, region_series = climate_full ) # add the climate reconstructions to the sf object, and remove the time_step # as we don't need it for modelling horses <- horses %>% bind_cols(horses_df[, climate_vars]) %>% select(-time_step)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a1_palaeodata_application.html","id":"fit-the-model-by-crossvalidation","dir":"Articles","previous_headings":"","what":"Fit the model by crossvalidation","title":"Application with palaeodata","text":"Next, need set recipe define handle dataset. don’t want transform data, just need define formula (class outcome, variables predictors; note , sf objects, geometry automatically ignored predictor): can quickly check variables want : now build workflow_set different models, defining hyperparameters want tune. use glm, gam, random forest boosted trees models, random forest boosted trees tunable hyperparameters. commonly used models, tidysdm automatically chooses important parameters, possible fully customise model specifications. Note gams unusual, need specify formula define variables fit smooths. default, gam_formula() fits smooth every continuous predictor, custom formula can provided instead. now want set spatial block cross-validation scheme tune assess models: can now use block CV folds tune assess models: Note workflow_set correctly detects tuning parameters glm gam. can look performance models : Now let’s create ensemble, selecting best set parameters model (really relevant random forest, hype-parameters tune glm gam). use Boyce continuous index metric choose best random forest boosted tree. adding members ensemble, automatically fitted full training dataset, ready make predictions. visualise ","code":"horses_rec <- recipe(horses, formula = class ~ .) horses_rec #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 3 #> coords: 2 horses_rec$var_info #> # A tibble: 6 × 4 #> variable type role source #> #> 1 bio01 predictor original #> 2 bio10 predictor original #> 3 bio12 predictor original #> 4 X coords original #> 5 Y coords original #> 6 class outcome original horses_models <- # create the workflow_set workflow_set( preproc = list(default = horses_rec), models = list( # the standard glm specs (no params to tune) glm = sdm_spec_glm(), # the standard sdm specs (no params to tune) gam = sdm_spec_gam(), # rf specs with tuning rf = sdm_spec_rf(), # boosted tree model (gbm) specs with tuning gbm = sdm_spec_boost_tree() ), # make all combinations of preproc and models, cross = TRUE ) %>% # set formula for gams update_workflow_model(\"default_gam\", spec = sdm_spec_gam(), formula = gam_formula(horses_rec) ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) library(tidysdm) set.seed(1005) horses_cv <- spatial_block_cv(horses, v = 5) autoplot(horses_cv) set.seed(123) horses_models <- horses_models %>% workflow_map(\"tune_grid\", resamples = horses_cv, grid = 5, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 4 resampling: default_glm #> ✔ 1 of 4 resampling: default_glm (263ms) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 2 of 4 resampling: default_gam #> ✔ 2 of 4 resampling: default_gam (680ms) #> i 3 of 4 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 3 of 4 tuning: default_rf (2.4s) #> i 4 of 4 tuning: default_gbm #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 4 of 4 tuning: default_gbm (15.1s) autoplot(horses_models) horses_ensemble <- simple_ensemble() %>% add_member(horses_models, metric = \"boyce_cont\") autoplot(horses_ensemble)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a1_palaeodata_application.html","id":"projecting-to-other-times","dir":"Articles","previous_headings":"","what":"Projecting to other times","title":"Application with palaeodata","text":"can now make predictions ensemble (using default option taking mean predictions model) Last Glacial Maximum (LGM, 21,000 years ago). predict using ensemble:","code":"climate_lgm <- pastclim::region_slice( time_bp = -20000, bio_variables = climate_vars, data = \"Example\", crop = region_outline$Europe ) prediction_lgm <- predict_raster(horses_ensemble, climate_lgm) ggplot() + geom_spatraster(data = prediction_lgm, aes(fill = mean)) + scale_fill_terrain_c()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"exploring-models-with-dalex","dir":"Articles","previous_headings":"","what":"Exploring models with DALEX","title":"Examples of additional tidymodels features","text":"issue machine learning algorithms easy understand role different variables giving final prediction. number packages created explore explain behaviour ML algorithms, used tidysdm. tidysdm overview article, illustrated use recipes create profiles. demonstrate use DALEX, excellent package methods deal tidymodels. tidysdm contains additional functions allow use use DALEX functions directly tidysdm ensembles. use simple ensemble built overview vignette. first step DALEX create explainer object, can queried different functions package, turn explainer explanation (following DALEX lingo). first step, use custom function explain_tidysdm generate explainer: Now explainer, can explore variable importance ensemble: generate partial dependency plots given variable (e.g. bio05): many functions DALEX can applied explainer explore behaviour model; see several tutorial https://modeloriented.github.io/DALEX/ also possible explore individual models make ensemble: resulting list can used build lists explanations, can plotted.","code":"library(tidysdm) #> Loading required package: tidymodels #> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ recipes 1.1.0 #> ✔ dials 1.3.0 ✔ rsample 1.2.1 #> ✔ dplyr 1.1.4 ✔ tibble 3.2.1 #> ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1 #> ✔ infer 1.0.7 ✔ tune 1.2.1 #> ✔ modeldata 1.4.0 ✔ workflows 1.1.4 #> ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0 #> ✔ purrr 1.0.2 ✔ yardstick 1.3.1 #> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── #> ✖ purrr::discard() masks scales::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() #> • Learn how to get started at https://www.tidymodels.org/start/ #> Loading required package: spatialsample lacerta_ensemble #> A simple_ensemble of models #> #> Members: #> • default_glm #> • default_rf #> • default_gbm #> • default_maxent #> #> Available metrics: #> • boyce_cont #> • roc_auc #> • tss_max #> #> Metric used to tune workflows: #> • boyce_cont explainer_lacerta_ens <- explain_tidysdm(lacerta_ensemble) #> Preparation of a new explainer is initiated #> -> model label : data.frame ( default ) #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : predict_function #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidysdm , ver. 0.9.6.9002 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.01490969 , mean = 0.2861937 , max = 0.7169324 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.6465921 , mean = -0.03619367 , max = 0.7891973 #> A new explainer has been created! library(DALEX) #> Welcome to DALEX (version: 2.4.3). #> Find examples and detailed introduction at: http://ema.drwhy.ai/ #> Additional features will be available after installation of: ggpubr. #> Use 'install_dependencies()' to get all suggested dependencies #> #> Attaching package: 'DALEX' #> The following object is masked from 'package:dplyr': #> #> explain vip_ensemble <- model_parts(explainer = explainer_lacerta_ens) plot(vip_ensemble) pdp_bio05 <- model_profile(explainer_lacerta_ens, N = 500, variables = \"bio05\") plot(pdp_bio05) explainer_list <- explain_tidysdm(tidysdm::lacerta_ensemble, by_workflow = TRUE) #> Preparation of a new explainer is initiated #> -> model label : default_glm #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.2280177 , mean = 0.75 , max = 0.9854359 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.9096205 , mean = 5.395921e-12 , max = 0.7719823 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_rf #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.1315421 , mean = 0.7480648 , max = 1 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.6878921 , mean = 0.001935171 , max = 0.5870619 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_gbm #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.3390188 , mean = 0.7314788 , max = 0.9632964 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.9268645 , mean = 0.01852121 , max = 0.6280424 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_maxent #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.1095764 , mean = 0.6256817 , max = 0.9960248 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.8207859 , mean = 0.1243183 , max = 0.8904236 #> A new explainer has been created! profile_list <- lapply(explainer_list, model_profile, N = 500, variables = \"bio05\" ) plot(profile_list)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"the-initial-split","dir":"Articles","previous_headings":"","what":"The initial split","title":"Examples of additional tidymodels features","text":"standard approach tidymodels make initial split data test training set. use retain 20% data (1/5) testing set, use rest training. start loading set presences absences associated climate, analogous one generated tidysdm overview article: use spatial_initial_split split, using spatial_block_cv scheme partition data: check balance presences vs pseudoabsences: can now extract training set lacerta_initial split, sample folds set cross validation (note set cellsize offset based full dataset, lacerta_thin; allows us use grid used initial_split). check balance dataset:","code":"library(tidysdm) library(sf) #> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) set.seed(1005) lacerta_initial <- spatial_initial_split(lacerta_thin, prop = 1 / 5, spatial_block_cv ) autoplot(lacerta_initial) check_splits_balance(lacerta_initial, class) #> # A tibble: 1 × 4 #> presence_test pseudoabs_test presence_train pseudoabs_train #> #> 1 88 267 25 72 set.seed(1005) lacerta_training <- training(lacerta_initial) lacerta_cv <- spatial_block_cv(lacerta_training, v = 5, cellsize = grid_cellsize(lacerta_thin), offset = grid_offset(lacerta_thin) ) autoplot(lacerta_cv) check_splits_balance(lacerta_cv, class) #> # A tibble: 5 × 4 #> presence_assessment pseudoabs_assessment presence_analysis pseudoabs_analysis #> #> 1 74 197 14 70 #> 2 59 225 29 42 #> 3 73 220 15 47 #> 4 76 209 12 58 #> 5 70 218 18 49"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"different-recipes-for-certain-models","dir":"Articles","previous_headings":"","what":"Different recipes for certain models","title":"Examples of additional tidymodels features","text":"certain type models (e.g. glm, svm) struggle correlated variables; algorithms, random forests, can handle correlated variables. , create two recipes, one variables, one variables uncorrelated: now use two recipes workflowset (keep small computational time), selecting appropriate recipe model. include model (polynomial support vector machines, SVM) wrapper tidysdm creating model specification. However, can use standard model spec yardstick: can now use block CV folds tune assess models. Note multiple tuning approaches, besides standard grid method. use tune_bayes tune package (see help page see Gaussian Process model used choose parameter combinations). tuning method (opposed use standard grid) allow hyper-parameters unknown limits, mtry random forest undefined upper range depends number variables dataset. , tuning, need finalise mtry informing set dials actual data: now can tune models: can look performance models :","code":"lacerta_rec_all <- recipe(lacerta_thin, formula = class ~ .) lacerta_rec_uncor <- lacerta_rec_all %>% step_rm(all_of(c( \"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", \"bio18\", \"bio19\", \"altitude\" ))) lacerta_rec_uncor #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 20 #> coords: 2 #> #> ── Operations #> • Variables removed: all_of(c(\"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", #> \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", #> \"bio18\", \"bio19\", \"altitude\")) lacerta_models <- # create the workflow_set workflow_set( preproc = list( uncor = lacerta_rec_uncor, # recipe for the glm all = lacerta_rec_all, # recipe for the random forest all = lacerta_rec_uncor # recipe for svm ), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf(), # svm specs with tuning svm = parsnip::svm_poly( cost = tune(), degree = tune() ) %>% parsnip::set_engine(\"kernlab\") %>% parsnip::set_mode(\"classification\") ), # make all combinations of preproc and models, cross = FALSE ) %>% # tweak controls to store information needed later to create the ensemble # note that we use the bayes version as we will use a Bayes search (see later) option_add(control = stacks::control_stack_bayes()) rf_param <- lacerta_models %>% # extract the rf workflow extract_workflow(\"all_rf\") %>% # extract its parameters dials (used to tune) extract_parameter_set_dials() %>% # give it the predictors to finalize mtry finalize(x = st_drop_geometry(lacerta_thin) %>% select(-class)) # now update the workflowset with the new parameter info lacerta_models <- lacerta_models %>% option_add(param_info = rf_param, id = \"all_rf\") set.seed(1234567) lacerta_models <- lacerta_models %>% workflow_map(\"tune_bayes\", resamples = lacerta_cv, initial = 8, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 3 resampling: uncor_glm #> ✔ 1 of 3 resampling: uncor_glm (316ms) #> i 2 of 3 tuning: all_rf #> ! No improvement for 10 iterations; returning current results. #> ✔ 2 of 3 tuning: all_rf (16s) #> i 3 of 3 tuning: all_svm #> ✔ 3 of 3 tuning: all_svm (21.2s) autoplot(lacerta_models)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"stack-ensembles","dir":"Articles","previous_headings":"","what":"Stack ensembles","title":"Examples of additional tidymodels features","text":"Instead building simple ensemble best version model type, can build stack ensemble, implemented package stacks. Stacking uses meta-learning algorithm learn best combine multiple models, including multiple versions algorithm different hyper-parameters. can see three versions SVM one random forests selected; stacking coefficients give indication weight model carries within ensemble. can now use ensemble make predictions testing data: look goodness fit using commonly used sdm metrics. Note sdm_metric_set first invoked generate function (empty ()) used data. can now make predictions stacked ensemble. start extracting climate variables interest","code":"library(stacks) set.seed(1005) lacerta_stack <- # initialize the stack stacks() %>% # add candidate members add_candidates(lacerta_models) %>% # determine how to combine their predictions blend_predictions() %>% # fit the candidates with non-zero weights (i.e.non-zero stacking coefficients) fit_members() autoplot(lacerta_stack, type = \"weights\") lacerta_testing <- testing(lacerta_initial) lacerta_test_pred <- lacerta_testing %>% bind_cols(predict(lacerta_stack, ., type = \"prob\")) sdm_metric_set()(data = lacerta_test_pred, truth = class, .pred_presence) #> # A tibble: 3 × 3 #> .metric .estimator .estimate #> #> 1 boyce_cont binary 0.853 #> 2 roc_auc binary 0.986 #> 3 tss_max binary 0.92 download_dataset(\"WorldClim_2.1_10m\") climate_vars <- lacerta_rec_all$var_info %>% filter(role == \"predictor\") %>% pull(variable) climate_present <- pastclim::region_slice( time_ce = 1985, bio_variables = climate_vars, data = \"WorldClim_2.1_10m\", crop = iberia_poly ) prediction_present <- predict_raster(lacerta_stack, climate_present, type = \"prob\" ) library(tidyterra) #> #> Attaching package: 'tidyterra' #> The following object is masked from 'package:stats': #> #> filter ggplot() + geom_spatraster(data = prediction_present, aes(fill = .pred_presence)) + scale_fill_terrain_c() + # plot presences used in the model geom_sf(data = lacerta_thin %>% filter(class == \"presence\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"using-multi-level-factors-as-predictors","dir":"Articles","previous_headings":"","what":"Using multi-level factors as predictors","title":"Examples of additional tidymodels features","text":"machine learning algorithms natively use multilevel factors predictors. solution create dummy variables, binary variables represent levels factor. tidymodels, done using step_dummy function. Let’s create factor variable 3 levels based altitude. create recipe adding step create dummy variables topography variable. Let’s us see : added two “derived” variables, topography_hills topography_mountains, binary variables allow us code topography (plains used reference level, coded hills mountains 0 given location). can look first rows data see new variables baking recipe: can now run sdm usual: can now verify dummy variables used extracting model fit one models ensemble: can see coefficients topography_hills topography_mountains. Let us now predict presence lizard Iberian Peninsula using ensemble. Note , predict_raster() work, name levels categorical variable need match used training models (.e. recipe step_dummy()):","code":"library(tidysdm) # load the dataset lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) # create a topography variable with 3 levels based on altitude lacerta_thin$topography <- cut(lacerta_thin$altitude, breaks = c(-Inf, 200, 800, Inf), labels = c(\"plains\", \"hills\", \"mountains\")) table(lacerta_thin$topography) #> #> plains hills mountains #> 82 233 137 # subset to variable of interest lacerta_thin <- lacerta_thin %>% select(class, bio05, bio06, bio12, bio15, topography) lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_dummy(topography) lacerta_rec #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 5 #> coords: 2 #> #> ── Operations #> • Dummy variables from: topography lacerta_prep <- prep(lacerta_rec) summary(lacerta_prep) #> # A tibble: 9 × 4 #> variable type role source #> #> 1 bio05 predictor original #> 2 bio06 predictor original #> 3 bio12 predictor original #> 4 bio15 predictor original #> 5 X coords original #> 6 Y coords original #> 7 class outcome original #> 8 topography_hills predictor derived #> 9 topography_mountains predictor derived lacerta_bake <- bake(lacerta_prep, new_data = lacerta_thin) glimpse(lacerta_bake) #> Rows: 452 #> Columns: 9 #> $ bio05 30.50350, 25.28050, 23.67800, 29.68875, 26.34075,… #> $ bio06 1.477000, 3.631750, 0.789500, 6.048750, 1.869000,… #> $ bio12 596, 1490, 1395, 729, 1324, 1409, 1260, 1390, 116… #> $ bio15 50.59533, 50.07437, 47.24211, 58.88199, 51.62960,… #> $ X -5.394226, -8.374844, -7.886102, -8.231414, -7.17… #> $ Y 39.48495, 41.97207, 41.89992, 39.49710, 41.78401,… #> $ class presence, presence, presence, presence, presence,… #> $ topography_hills 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0… #> $ topography_mountains 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1… # define the models lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) # tune set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 3) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (192ms) #> i 2 of 2 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 2 of 2 tuning: default_rf (967ms) # fit the ensemble lacerta_ensemble <- simple_ensemble() %>% add_member(lacerta_models, metric = \"boyce_cont\") lacerta_ensemble$workflow[[1]] %>% extract_fit_parsnip() #> parsnip model object #> #> #> Call: stats::glm(formula = ..y ~ ., family = stats::binomial, data = data) #> #> Coefficients: #> (Intercept) bio05 bio06 #> -6.920024 0.635493 -0.334427 #> bio12 bio15 topography_hills #> -0.002505 -0.113106 -1.851098 #> topography_mountains #> -2.531259 #> #> Degrees of Freedom: 451 Total (i.e. Null); 445 Residual #> Null Deviance: 508.4 #> Residual Deviance: 189.5 AIC: 203.5 climate_present <- terra::readRDS(system.file(\"extdata/lacerta_climate_present_10m.rds\", package = \"tidysdm\")) # first we add a topography variable to the climate data climate_present$topography <- climate_present$altitude climate_present$topography <- terra::classify(climate_present$topography, rcl = c(-Inf, 200, 800, Inf), include.lowest=TRUE, brackets=TRUE) library(terra) #> terra 1.7.78 #> #> Attaching package: 'terra' #> The following objects are masked from 'package:kernlab': #> #> buffer, size #> The following object is masked from 'package:tidyr': #> #> extract #> The following object is masked from 'package:scales': #> #> rescale levels(climate_present$topography) <- data.frame(ID = c(0,1,2), topography = c(\"plains\", \"hills\", \"mountains\")) # now we can predict predict_factor <- predict_raster(lacerta_ensemble, climate_present) plot(predict_factor)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a3_troubleshooting.html","id":"nas-in-the-data","dir":"Articles","previous_headings":"","what":"NAs in the data","title":"Troubleshooting models that fail","text":"algorithms allow NAs. can generate problematic dataset loading Lacerta dataset, manually add NA: Let us set recipe fit workflow_set can see error self-explanatory. Also, note error impacts data splits (technically, rset objects): error repeated 15 times (5 splits 3 hyperparameter values). Prepping recipe (trains dataset) can help diagnosing problems: Note , training information, warned 1 row incomplete. use step_naomit deal programmatically, ascertain generating missing data (prefer latter, good SDM pipeline generate observations, presences pseudoabsences, missing data).","code":"library(tidysdm) #> Loading required package: tidymodels #> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ recipes 1.1.0 #> ✔ dials 1.3.0 ✔ rsample 1.2.1 #> ✔ dplyr 1.1.4 ✔ tibble 3.2.1 #> ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1 #> ✔ infer 1.0.7 ✔ tune 1.2.1 #> ✔ modeldata 1.4.0 ✔ workflows 1.1.4 #> ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0 #> ✔ purrr 1.0.2 ✔ yardstick 1.3.1 #> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── #> ✖ purrr::discard() masks scales::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() #> • Use suppressPackageStartupMessages() to eliminate package startup messages #> Loading required package: spatialsample lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) lacerta_thin$bio05[37] <- NA lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_rm(all_of(c( \"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", \"bio18\", \"bio19\", \"altitude\" ))) lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (320ms) #> i 2 of 2 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> → A | error: Missing data in columns: bio05. #> There were issues with some computations A: x1 #> There were issues with some computations A: x15 #> #> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more #> information. #> Warning: Unknown or uninitialised column: `.notes`. #> ✖ 2 of 2 tuning: default_rf failed with lacerta_prep <- lacerta_rec %>% prep(lacerta_thin) lacerta_prep #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 20 #> coords: 2 #> #> ── Training information #> Training data contained 452 data points and 1 incomplete row. #> #> ── Operations #> • Variables removed: bio01, bio02, bio03, bio04, bio07, bio08, ... | Trained"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a3_troubleshooting.html","id":"recipes-and-the-response-variable","dir":"Articles","previous_headings":"","what":"Recipes and the response variable","title":"Troubleshooting models that fail","text":"response variable treated special way recipes, can lead problems. best manipulate (e.g. transform character factor) response variable recipe, since response variable available train test models, make projections. hard-coded step recipe includes response variable, model fit, fail start making predictions. Another potential mistake remove response variable selecting variables interest. can happen use step_select choose variables interest, error less clear: Let’s load data create recipe step_select: Now create workflow set fit : errors intuitive. However, models failed algorithms, suggests problem lies data preparation side (either data , recipe). Ideally, already look data (summary glimpse). , case, know data fine. Whilst prepping (sometimes baking) recipe generally informative predictor variables, hard diagnose problems outcome variable recipe. Prepping show anything obvious: case, process exclusion. Everything seems fine, models don’t work. ask outcome variable might problematic. general rule, found easier rely step_rm remove variables (e.g. correlated variables).","code":"lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) suggested_vars <- c(\"bio05\", \"bio06\", \"bio13\", \"bio14\", \"bio15\") lacerta_rec_sel <- recipe(lacerta_thin, formula = class ~ .) %>% step_select(all_of(suggested_vars)) lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec_sel), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> → A | error: ! `logistic_reg()` was unable to find an outcome. #> ℹ Ensure that you have specified an outcome column and that it hasn't been #> removed in pre-processing. #> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more #> information. #> Warning: Unknown or uninitialised column: `.notes`. #> ✖ 1 of 2 resampling: default_glm failed with #> i 2 of 2 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> → A | error: ! `rand_forest()` was unable to find an outcome. #> ℹ Ensure that you have specified an outcome column and that it hasn't been #> removed in pre-processing. #> There were issues with some computations A: x2 #> There were issues with some computations A: x15 #> #> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more information. #> Unknown or uninitialised column: `.notes`. #> ✖ 2 of 2 tuning: default_rf failed with lacerta_prep_sel <- lacerta_rec_sel %>% prep(lacerta_thin) lacerta_prep_sel #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 20 #> coords: 2 #> #> ── Training information #> Training data contained 452 data points and no incomplete rows. #> #> ── Operations #> • Variables selected: bio05, bio06, bio13, bio14, bio15 | Trained"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a3_troubleshooting.html","id":"using-the-desired-formula-with-gam","dir":"Articles","previous_headings":"","what":"Using the desired formula with GAM","title":"Troubleshooting models that fail","text":"General Additive Models unusual syntax, user define variables fitted splines. tidysdm functions simplify process, assuming user just wants fit standard smooth every continuous predictor. Note step defining formula incompatible using step_cor recipe. step_cor removes correlated variables recipes, using similar algorithm filter_collinear using method cor_caret. However, algorithm fitted data split cross-validating. means different variables eventually presented model fitted split, leading error mismatch formula available variables. known issue GAMs implemented tidymodels.","code":"lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_rm(all_of(c( \"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", \"bio18\", \"bio19\", \"altitude\" ))) lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # the standard gam specs gam = sdm_spec_gam() ), # make all combinations of preproc and models, cross = TRUE ) %>% # set formula for gams update_workflow_model(\"default_gam\", spec = sdm_spec_gam(), formula = gam_formula(lacerta_rec) ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (271ms) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 2 of 2 resampling: default_gam #> ✔ 2 of 2 resampling: default_gam (1.4s)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a3_troubleshooting.html","id":"when-only-some-splits-fail","dir":"Articles","previous_headings":"","what":"When only some splits fail","title":"Troubleshooting models that fail","text":"examples , splits used cross-validation given algorithms failed. However, also possible failures occur splits certain algorithms (technically, specific rsplit within certain workflows). type problem occurs, best extract problematic workflow, potentially investigate fitting specific rsplit. generate problematic dataset subsampling lacerta dataset: create 3 folds attempt fit models: see one folds gives us error using GAMs. error (“Fitting terminated step failure - check results carefully”) comes gam function package mgcv. quick google StackOverflow[https://stats.stackexchange.com/questions/576273/gam-model-warning-message-step-failure--theta-estimation] gives us idea error comes . start extracting results gam fits: see , .notes column, second item empty (zero rows). can check indeed contains error wanted: can now get problematic data split, extract training data: case, nothing obvious leads error (important check make sure enough presences split; presences generally lead errors). can now extract workflow refit split confirm isolated problem: next step dig deeper data, trying understand whether outliers problematic. specific steps depend algorithm giving problems.","code":"lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) set.seed(123) lacerta_thin <- lacerta_thin[sample( 1:nrow(lacerta_thin), nrow(lacerta_thin) / 5 ), ] lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_rm(all_of(c( \"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", \"bio18\", \"bio19\", \"altitude\" ))) lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # the standard gam specs gam = sdm_spec_gam(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # set formula for gams update_workflow_model(\"default_gam\", spec = sdm_spec_gam(), formula = gam_formula(lacerta_rec) ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 3) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 3 resampling: default_glm #> ✔ 1 of 3 resampling: default_glm (188ms) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 2 of 3 resampling: default_gam #> → A | warning: Fitting terminated with step failure - check results carefully #> There were issues with some computations A: x1 #> There were issues with some computations A: x1 #> #> ✔ 2 of 3 resampling: default_gam (1.6s) #> i 3 of 3 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 3 of 3 tuning: default_rf (473ms) gam_results <- extract_workflow_set_result(lacerta_models, id = \"default_gam\") gam_results #> # Resampling results #> # 3-fold spatial block cross-validation #> # A tibble: 3 × 5 #> splits id .metrics .notes .predictions #> #> 1 Fold1 #> 2 Fold2 #> 3 Fold3 #> #> There were issues with some computations: #> #> - Warning(s) x1: Fitting terminated with step failure - check results carefully #> #> Run `show_notes(.Last.tune.result)` for more information. gam_results$.notes[2] #> [[1]] #> # A tibble: 1 × 3 #> location type note #> #> 1 preprocessor 1/1, model 1/1 warning Fitting terminated with step failure - ch… problem_split <- gam_results$splits[2][[1]] summary(training(problem_split)) #> class geometry bio01 bio02 #> presence :18 POINT :63 Min. : 4.74 Min. : 6.737 #> pseudoabs:45 epsg:4326 : 0 1st Qu.:11.81 1st Qu.: 9.336 #> +proj=long...: 0 Median :13.09 Median :10.937 #> Mean :12.88 Mean :11.052 #> 3rd Qu.:14.82 3rd Qu.:12.649 #> Max. :17.87 Max. :14.037 #> bio03 bio04 bio05 bio06 #> Min. :34.30 Min. :341.2 Min. :19.90 Min. :-6.2732 #> 1st Qu.:39.30 1st Qu.:500.8 1st Qu.:24.91 1st Qu.:-0.6787 #> Median :40.55 Median :610.8 Median :28.59 Median : 1.1918 #> Mean :40.54 Mean :584.6 Mean :28.57 Mean : 1.2175 #> 3rd Qu.:42.19 3rd Qu.:656.1 3rd Qu.:32.31 3rd Qu.: 3.5664 #> Max. :46.98 Max. :756.7 Max. :35.31 Max. : 8.2344 #> bio07 bio08 bio09 bio10 #> Min. :16.40 Min. : 1.922 Min. : 1.588 Min. :12.86 #> 1st Qu.:23.32 1st Qu.: 7.716 1st Qu.:16.995 1st Qu.:18.53 #> Median :27.88 Median : 9.668 Median :19.828 Median :20.51 #> Mean :27.35 Mean : 9.450 Mean :18.938 Mean :20.48 #> 3rd Qu.:31.49 3rd Qu.:11.341 3rd Qu.:22.607 3rd Qu.:23.08 #> Max. :35.27 Max. :16.882 Max. :25.470 Max. :25.71 #> bio11 bio12 bio13 bio14 #> Min. :-2.060 Min. : 249.0 Min. : 36.0 Min. : 2.00 #> 1st Qu.: 4.968 1st Qu.: 452.0 1st Qu.: 59.0 1st Qu.: 8.00 #> Median : 6.236 Median : 628.0 Median : 91.0 Median :17.00 #> Mean : 6.268 Mean : 757.8 Mean :101.5 Mean :21.97 #> 3rd Qu.: 8.455 3rd Qu.:1016.5 3rd Qu.:119.0 3rd Qu.:30.50 #> Max. :11.795 Max. :1622.0 Max. :248.0 Max. :74.00 #> bio15 bio16 bio17 bio18 #> Min. :13.44 Min. : 96.0 Min. : 17.00 Min. : 22.0 #> 1st Qu.:30.07 1st Qu.:157.0 1st Qu.: 43.00 1st Qu.: 47.0 #> Median :38.97 Median :249.0 Median : 71.00 Median : 78.0 #> Mean :41.58 Mean :280.3 Mean : 88.08 Mean : 96.0 #> 3rd Qu.:54.30 3rd Qu.:334.0 3rd Qu.:109.50 3rd Qu.:117.5 #> Max. :71.59 Max. :714.0 Max. :253.00 Max. :253.0 #> bio19 altitude #> Min. : 68.0 Min. : 38.0 #> 1st Qu.:128.5 1st Qu.: 319.5 #> Median :225.0 Median : 689.0 #> Mean :252.5 Mean : 685.5 #> 3rd Qu.:319.5 3rd Qu.: 855.0 #> Max. :714.0 Max. :1926.0 gam_workflow <- extract_workflow(lacerta_models, id = \"default_gam\") faulty_gam <- fit(gam_workflow, training(problem_split)) #> Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L, #> : Fitting terminated with step failure - check results carefully"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Michela Leonardi. Author. Margherita Colucci. Author. Andrea Vittorio Pozzi. Author. Eleanor M.L. Scerri. Author. Andrea Manica. Author, maintainer.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Leonardi M, Colucci M, Pozzi , Scerri E, Manica (2024). tidysdm: Species Distribution Models Tidymodels. R package version 0.9.6.9002, https://evolecolgroup.github.io/tidysdm/, https://github.com/EvolEcolGroup/tidysdm.","code":"@Manual{, title = {tidysdm: Species Distribution Models with Tidymodels}, author = {Michela Leonardi and Margherita Colucci and Andrea Vittorio Pozzi and Eleanor M.L. Scerri and Andrea Manica}, year = {2024}, note = {R package version 0.9.6.9002, https://evolecolgroup.github.io/tidysdm/}, url = {https://github.com/EvolEcolGroup/tidysdm}, }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/index.html","id":"tidysdm-","dir":"","previous_headings":"","what":"Species Distribution Models with Tidymodels","title":"Species Distribution Models with Tidymodels","text":"goal tidysdm implement Species Distribution Models using tidymodels framework. advantage tidymodels model syntax results returned user standardised, thus providing coherent interface modelling. Given variety models required SDM, tidymodels ideal framework. tidysdm provides number wrappers specialised functions facilitate fitting SDM tidymodels. Besides modelling contemporary species, tidysdm number functions specifically designed work palaeontological data. Whilst users free use environmental data, articles showcase potential integration pastclim, helps downloading manipulating present day data, future predictions, palaeoclimate reconstructions. overview capabilities tidysdm given Leonardi et al. (2023).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Species Distribution Models with Tidymodels","text":"tidysdm CRAN, easiest way install : version CRAN recommended every day use. New features bug fixes appear first dev branch GitHub, make way CRAN. need early access new features, can install tidysdm directly GitHub. install GitHub, need use devtools; haven’t done already, get CRAN install.packages(\"devtools\"). can install latest dev version tidysdm GitHub :","code":"install.packages(\"tidysdm\") # install.packages(\"devtools\") devtools::install_github(\"EvolEcolGroup/tidysdm\", ref = \"dev\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/index.html","id":"overview-of-functionality","dir":"","previous_headings":"","what":"Overview of functionality","title":"Species Distribution Models with Tidymodels","text":"dedicated website, can find Articles giving step--step overview fitting SDMs contemporary species, well equivalent tutorial using palaeontological data. Furthermore, Article examples leverage various features tidymodels commonly adopted SDM pipelines also dev version site updated dev branch tidysdm (top left dev website, version number red format x.x.x.9xxx, indicating development version). want contribute, make sure read contributing guide.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/index.html","id":"when-something-does-not-work","dir":"","previous_headings":"","what":"When something does not work","title":"Species Distribution Models with Tidymodels","text":"get error trying fit model? tidysdm relatively new package, might well , get error, might encountered bug. However, also possible misspecified model (error comes tidymodels, model valid). prepared Article diagnose failing models. fully comprehensive list everything go wrong, hopefully give ideas dig deeper wrong. also check issues GitHub see whether problem already reported. convinced problem bug tidysdm, feel free create new issue. Please make sure updated latest version tidysdm, well updating packages system, provide reproducible example developers investigate problem. think can help fixing bug, read contributing guide.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_member.html","id":null,"dir":"Reference","previous_headings":"","what":"Add best member of workflow to a simple ensemble — add_member","title":"Add best member of workflow to a simple ensemble — add_member","text":"function adds member(s) simple_ensemble() object, taking best member workflow provided. possible pass individual tune_results objects tuned workflow, workflowsets::workflow_set().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_member.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add best member of workflow to a simple ensemble — add_member","text":"","code":"add_member(x, member, ...) # Default S3 method add_member(x, member, ...) # S3 method for class 'tune_results' add_member(x, member, metric = NULL, id = NULL, ...) # S3 method for class 'workflow_set' add_member(x, member, metric = NULL, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_member.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add best member of workflow to a simple ensemble — add_member","text":"x simple_ensemble member(s) added member tune_results, workflowsets::workflow_set ... used moment. metric character string (NULL) metric optimize. NULL, first metric used. id name given workflow wflow_id column.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_member.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add best member of workflow to a simple ensemble — add_member","text":"simple_ensemble additional member(s)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_repeat.html","id":null,"dir":"Reference","previous_headings":"","what":"Add repeat(s) to a repeated ensemble — add_repeat","title":"Add repeat(s) to a repeated ensemble — add_repeat","text":"function adds repeat(s) repeat_ensemble object, repeat simple_ensemble. repeats must contain members, selected using metric.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_repeat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add repeat(s) to a repeated ensemble — add_repeat","text":"","code":"add_repeat(x, rep, ...) # Default S3 method add_repeat(x, rep, ...) # S3 method for class 'simple_ensemble' add_repeat(x, rep, ...) # S3 method for class 'list' add_repeat(x, rep, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_repeat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add repeat(s) to a repeated ensemble — add_repeat","text":"x repeat_ensemble repeat(s) added rep repeat, single simple_ensemble, list simple_ensemble objects ... used moment.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_repeat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add repeat(s) to a repeated ensemble — add_repeat","text":"repeat_ensemble additional repeat(s)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot the results of a simple ensemble — autoplot.simple_ensemble","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"autoplot() method plots performance metrics ranked using metric.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"","code":"# S3 method for class 'simple_ensemble' autoplot( object, rank_metric = NULL, metric = NULL, std_errs = stats::qnorm(0.95), ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"object simple_ensemble whose elements results. rank_metric character string metric used rank results. none given, first metric metric set used (filtering metric option). metric character vector metrics (apart rank_metric) included visualization. NULL (default), available metrics plotted std_errs number standard errors plot (standard error exists). ... options pass autoplot(). Currently unused.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"ggplot object.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"function intended produce default plot visualize helpful information across possible applications simple_ensemble. sophisticated plots can produced using standard ggplot2 code plotting. x-axis workflow rank set (value one best) versus performance metric(s) y-axis. multiple metrics, facets metric, rank_metric first (provided; otherwise metric used create simple_ensemble used). multiple resamples used, confidence bounds shown result (95% confidence, default).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"","code":"# \\donttest{ #' # we use the two_class_example from `workflowsets` two_class_ens <- simple_ensemble() %>% add_member(two_class_res, metric = \"roc_auc\") #> #> Attaching package: ‘plotrix’ #> The following object is masked from ‘package:scales’: #> #> rescale autoplot(two_class_ens) # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"method provides good visualization method spatial initial rsplit.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"","code":"# S3 method for class 'spatial_initial_split' autoplot(object, ..., alpha = 0.6)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"object spatial_initial_rsplit object. Note resamples made sf objects create spatial_initial_rsplit objects; function work resamples made non-spatial tibbles data.frames. ... Options passed ggplot2::geom_sf(). alpha Opacity, passed ggplot2::geom_sf(). Values alpha range 0 1, lower values corresponding transparent colors.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"ggplot object fold assigned color, made using ggplot2::geom_sf().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"plot method wrapper around standard spatial_rsplit method, re-labels folds Testing Training following convention standard initial_split object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"","code":"set.seed(123) block_initial <- spatial_initial_split(boston_canopy, prop = 1 / 5, spatial_block_cv ) autoplot(block_initial)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert an object created with blockCV to an rsample object — blockcv2rsample","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"function creates objects created blockCV rsample objects can used tidysdm. BlockCV provides sophisticated sampling options spatialsample library. example, possible stratify sampling ensure presences absences evenly distributed among folds (see example ).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"","code":"blockcv2rsample(x, data)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"x object created blockCV function data sf object used create x","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"rsample object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"Note currently objects type cv_spatial cv_cluster supported.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"","code":"# \\donttest{ library(blockCV) #> blockCV 3.1.4 points <- read.csv(system.file(\"extdata/\", \"species.csv\", package = \"blockCV\")) pa_data <- sf::st_as_sf(points, coords = c(\"x\", \"y\"), crs = 7845) sb1 <- cv_spatial( x = pa_data, column = \"occ\", # the response column to balance the folds k = 5, # number of folds size = 350000, # size of the blocks in metres selection = \"random\", # random blocks-to-fold iteration = 10 ) # find evenly dispersed folds #> | | | 0% | |======= | 10% | |============== | 20% | |===================== | 30% | |============================ | 40% | |=================================== | 50% | |========================================== | 60% | |================================================= | 70% | |======================================================== | 80% | |=============================================================== | 90% | |======================================================================| 100% #> train_0 train_1 test_0 test_1 #> 1 172 207 85 36 #> 2 218 202 39 41 #> 3 218 192 39 51 #> 4 217 171 40 72 #> 5 203 200 54 43 sb1_rsample <- blockcv2rsample(sb1, pa_data) class(sb1_rsample) #> [1] \"spatial_rset\" \"rset\" \"tbl_df\" \"tbl\" \"data.frame\" autoplot(sb1_rsample) # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":null,"dir":"Reference","previous_headings":"","what":"Boyce continuous index (BCI) — boyce_cont","title":"Boyce continuous index (BCI) — boyce_cont","text":"function Boyce Continuous Index, measure model accuracy appropriate Species Distribution Models presence data (.e. using pseudoabsences background). algorithm used comes package enmSdm, uses multiple overlapping windows.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Boyce continuous index (BCI) — boyce_cont","text":"","code":"boyce_cont(data, ...) # S3 method for class 'data.frame' boyce_cont( data, truth, ..., estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL ) # S3 method for class 'sf' boyce_cont(data, ...) boyce_cont_vec( truth, estimate, estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Boyce continuous index (BCI) — boyce_cont","text":"data Either data.frame containing columns specified truth estimate arguments, table/matrix true class results columns table. ... set unquoted column names one dplyr selector functions choose variables contain class probabilities. truth binary, 1 column selected, correspond value event_level. Otherwise, many columns factor levels truth ordering columns factor levels truth. truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimator One \"binary\", \"hand_till\", \"macro\", \"macro_weighted\" specify type averaging done. \"binary\" relevant two class case. others general methods calculating multiclass metrics. default automatically choose \"binary\" truth binary, \"hand_till\" truth >2 levels case_weights specified, \"macro\" truth >2 levels case_weights specified (case \"hand_till\" well-defined). na_rm logical value indicating whether NA values stripped computation proceeds. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default uses internal helper generally defaults \"first\" case_weights optional column identifier case weights. unquoted column name evaluates numeric column data. _vec() functions, numeric vector. estimate truth binary, numeric vector class probabilities corresponding \"relevant\" class. Otherwise, matrix many columns factor levels truth. assumed order levels truth.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Boyce continuous index (BCI) — boyce_cont","text":"tibble columns .metric, .estimator, .estimate 1 row values. grouped data frames, number rows returned number groups.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Boyce continuous index (BCI) — boyce_cont","text":"multiclass version function, operates binary predictions (e.g. presences absences SDMs).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Boyce continuous index (BCI) — boyce_cont","text":"Boyce, M.S., P.R. Vernier, S.E. Nielsen F.K.. Schmiegelow. 2002. Evaluating resource selection functions. Ecol. Model., 157, 281-300. Hirzel, .H., G. Le Lay, V. Helfer, C. Randin . Guisan. 2006. Evaluating ability habitat suitability models predict species presences. Ecol. Model., 199, 142-152.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Boyce continuous index (BCI) — boyce_cont","text":"","code":"boyce_cont(two_class_example, truth, Class1) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 boyce_cont binary 0.805"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":null,"dir":"Reference","previous_headings":"","what":"Calibrate class thresholds — calib_class_thresh","title":"Calibrate class thresholds — calib_class_thresh","text":"Predict new dataset using simple ensemble. Predictions individual models combined according fun","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calibrate class thresholds — calib_class_thresh","text":"","code":"calib_class_thresh(object, class_thresh, metric_thresh = NULL)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calibrate class thresholds — calib_class_thresh","text":"object simple_ensemble object class_thresh probability threshold used convert probabilities classes. can number (0 1), character metric (currently \"tss_max\", \"kap_max\" \"sensitivity\"). sensitivity, additional target value passed along second element vector, e.g. c(\"sensitivity\",0.8). metric_thresh vector length 2 giving metric threshold, used prune models ensemble used prediction. 'metrics' need computed workflow tuned. metric's threshold needs match value used prediction. Examples c(\"accuracy\",0.8) c(\"boyce_cont\",0.7).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calibrate class thresholds — calib_class_thresh","text":"simple_ensemble object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calibrate class thresholds — calib_class_thresh","text":"","code":"test_ens <- simple_ensemble() %>% add_member(two_class_res[1:3, ], metric = \"roc_auc\") test_ens <- calib_class_thresh(test_ens, class_thresh = \"tss_max\") test_ens <- calib_class_thresh(test_ens, class_thresh = \"kap_max\") test_ens <- calib_class_thresh(test_ens, class_thresh = c(\"sens\", 0.9))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_coords_names.html","id":null,"dir":"Reference","previous_headings":"","what":"Check that we have a valid pair of coordinate names — check_coords_names","title":"Check that we have a valid pair of coordinate names — check_coords_names","text":"internal function checks coords (passed functions) valid set names, , NULL, standard variable names data","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_coords_names.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check that we have a valid pair of coordinate names — check_coords_names","text":"","code":"check_coords_names(data, coords)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_coords_names.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check that we have a valid pair of coordinate names — check_coords_names","text":"data data.frame containing locations. coords vector length two giving names \"x\" \"y\" coordinates, points data.frame use standard names.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_coords_names.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check that we have a valid pair of coordinate names — check_coords_names","text":"vector length 2 valid names, correct order","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_sdm_presence.html","id":null,"dir":"Reference","previous_headings":"","what":"Check that the column with presences is correctly formatted — check_sdm_presence","title":"Check that the column with presences is correctly formatted — check_sdm_presence","text":"tidysdm, string defining presences first level response factor. function checks column correctly formatted.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_sdm_presence.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check that the column with presences is correctly formatted — check_sdm_presence","text":"","code":"check_sdm_presence(.data, .col, presence_level = \"presence\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_sdm_presence.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check that the column with presences is correctly formatted — check_sdm_presence","text":".data data.frame tibble, derived object sf data.frame .col column containing presences presence_level string used define presence level .col","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_sdm_presence.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check that the column with presences is correctly formatted — check_sdm_presence","text":"TRUE correctly formatted","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"Check balance presences vs pseudoabsences among splits","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"","code":"check_splits_balance(splits, .col)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"splits data splits (rset split object), generated function spatialsample::spatial_block_cv() .col column containing presences","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"tibble number presences pseudoabsences assessment analysis set split (training testing initial split)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"","code":"lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) check_splits_balance(lacerta_cv, class) #> # A tibble: 5 × 4 #> presence_assessment pseudoabs_assessment presence_analysis pseudoabs_analysis #> #> 1 80 273 33 66 #> 2 80 283 33 56 #> 3 97 272 16 67 #> 4 94 262 19 77 #> 5 101 267 12 72"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/clamp_predictors.html","id":null,"dir":"Reference","previous_headings":"","what":"Clamp the predictors to match values in training set — clamp_predictors","title":"Clamp the predictors to match values in training set — clamp_predictors","text":"function clamps environmental variables terra::SpatRaster terra::SpatRasterDataset minimum maximum values exceed range training dataset.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/clamp_predictors.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Clamp the predictors to match values in training set — clamp_predictors","text":"","code":"clamp_predictors(x, training, .col, use_na) # Default S3 method clamp_predictors(x, training, .col, use_na) # S3 method for class 'SpatRaster' clamp_predictors(x, training, .col, use_na = FALSE) # S3 method for class 'SpatRasterDataset' clamp_predictors(x, training, .col, use_na = FALSE)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/clamp_predictors.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Clamp the predictors to match values in training set — clamp_predictors","text":"x terra::SpatRaster terra::SpatRasterDataset clamp. training training dataset (data.frame sf::sf object. .col column containing presences (optional). specified, excluded clamping. use_na boolean determining whether values outside range training dataset removed (set NA). FALSE (default), values outside training range replaced extremes training range.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/clamp_predictors.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Clamp the predictors to match values in training set — clamp_predictors","text":"terra::SpatRaster terra::SpatRasterDataset clamped ranges training","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"Return tibble performance metrics models.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"","code":"# S3 method for class 'simple_ensemble' collect_metrics(x, ...) # S3 method for class 'repeat_ensemble' collect_metrics(x, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"x simple_ensemble repeat_ensemble object ... currently used.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"tibble.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"applied ensemble, metrics returned contain actual tuning parameter columns values (unlike collect functions run objects). reason ensembles contain different types models models different tuning parameters.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"","code":"collect_metrics(lacerta_ensemble) #> # A tibble: 12 × 5 #> wflow_id .metric mean std_err n #> #> 1 default_glm boyce_cont 0.683 0.0573 3 #> 2 default_glm roc_auc 0.789 0.0137 3 #> 3 default_glm tss_max 0.533 0.0192 3 #> 4 default_rf boyce_cont 0.632 0.0552 3 #> 5 default_rf roc_auc 0.791 0.0146 3 #> 6 default_rf tss_max 0.519 0.0284 3 #> 7 default_gbm boyce_cont 0.738 0.0473 3 #> 8 default_gbm roc_auc 0.792 0.00304 3 #> 9 default_gbm tss_max 0.545 0.0368 3 #> 10 default_maxent boyce_cont 0.832 0.0771 3 #> 11 default_maxent roc_auc 0.832 0.00649 3 #> 12 default_maxent tss_max 0.595 0.0124 3 collect_metrics(lacerta_rep_ens) #> # A tibble: 18 × 6 #> rep_id wflow_id .metric mean std_err n #> #> 1 rep_01 default_glm boyce_cont 0.796 0.0241 5 #> 2 rep_01 default_glm roc_auc 0.974 0.00385 5 #> 3 rep_01 default_glm tss_max 0.881 0.0155 5 #> 4 rep_01 default_maxent boyce_cont 0.834 0.0603 5 #> 5 rep_01 default_maxent roc_auc 0.981 0.0129 5 #> 6 rep_01 default_maxent tss_max 0.914 0.0345 5 #> 7 rep_02 default_glm boyce_cont 0.716 0.0520 5 #> 8 rep_02 default_glm roc_auc 0.940 0.0268 5 #> 9 rep_02 default_glm tss_max 0.813 0.0543 5 #> 10 rep_02 default_maxent boyce_cont 0.808 0.0745 5 #> 11 rep_02 default_maxent roc_auc 0.982 0.00807 5 #> 12 rep_02 default_maxent tss_max 0.883 0.0270 5 #> 13 rep_03 default_glm boyce_cont 0.402 0.179 5 #> 14 rep_03 default_glm roc_auc 0.940 0.0202 5 #> 15 rep_03 default_glm tss_max 0.821 0.0330 5 #> 16 rep_03 default_maxent boyce_cont 0.908 0.0312 5 #> 17 rep_03 default_maxent roc_auc 0.976 0.00727 5 #> 18 rep_03 default_maxent tss_max 0.854 0.0345 5"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/conf_matrix_df.html","id":null,"dir":"Reference","previous_headings":"","what":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","title":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","text":"Create confusion matrix multiple thresholds, using optimise tss","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/conf_matrix_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","text":"","code":"conf_matrix_df(presences, absences)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/conf_matrix_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","text":"presences Probabilities presences absences probabilities absences","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/conf_matrix_df.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","text":"data.frame thresholds columns thres, tp, fp, fn, tn","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/control_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Control wrappers — control_ensemble_grid","title":"Control wrappers — control_ensemble_grid","text":"Supply light wrappers control argument tune::tune_grid(), tune::tune_bayes(), tune::fit_resamples() call return needed elements use ensemble. functions return appropriate control grid ensure assessment set predictions information model specifications preprocessors, supplied resampling results object! integrate ensemble settings existing control settings, note functions just call appropriate tune::control_* function arguments save_pred = TRUE, save_workflow = TRUE. wrappers equivalent ones used stacks package.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/control_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Control wrappers — control_ensemble_grid","text":"","code":"control_ensemble_grid() control_ensemble_resamples() control_ensemble_bayes()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/control_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Control wrappers — control_ensemble_grid","text":"tune::control_grid, tune::control_bayes, tune::control_resamples object.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":null,"dir":"Reference","previous_headings":"","what":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":"environmental variable, function computes density functions presences absences returns (1-overlap), measure distance two distributions. Variables high distance good candidates SDMs, species occurrences confined subset available background.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":"","code":"dist_pres_vs_bg(.data, .col)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":".data data.frame (derived object, tibble, sf) values bioclimate variables presences background .col column containing presences; assumes presences first level factor","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":"name vector distances","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":"","code":"# This should be updated to use a dataset from tidysdm data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) %>% select(-ecoreg) bradypus_tb %>% dist_pres_vs_bg(presence) #> pre6190_l10 frs6190_ann tmn6190_ann pre6190_ann vap6190_ann pre6190_l7 #> 0.4366602 0.4299480 0.4295013 0.4096230 0.3945855 0.3933454 #> h_dem tmp6190_ann dtr6190_ann pre6190_l4 tmx6190_ann cld6190_ann #> 0.3647375 0.3316686 0.3288771 0.2544976 0.2418274 0.1812527 #> pre6190_l1 #> 0.1297035"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":null,"dir":"Reference","previous_headings":"","what":"Create explainer from your tidysdm ensembles. — explain_tidysdm","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"DALEX designed explore explain behaviour Machine Learning methods. function creates DALEX explainer (see DALEX::explain()), can queried multiple function create explanations model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"","code":"explain_tidysdm( model, data, y, predict_function, predict_function_target_column, residual_function, ..., label, verbose, precalculate, colorize, model_info, type, by_workflow ) # Default S3 method explain_tidysdm( model, data = NULL, y = NULL, predict_function = NULL, predict_function_target_column = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, precalculate = TRUE, colorize = !isTRUE(getOption(\"knitr.in.progress\")), model_info = NULL, type = \"classification\", by_workflow = FALSE ) # S3 method for class 'simple_ensemble' explain_tidysdm( model, data = NULL, y = NULL, predict_function = NULL, predict_function_target_column = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, precalculate = TRUE, colorize = !isTRUE(getOption(\"knitr.in.progress\")), model_info = NULL, type = \"classification\", by_workflow = FALSE ) # S3 method for class 'repeat_ensemble' explain_tidysdm( model, data = NULL, y = NULL, predict_function = NULL, predict_function_target_column = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, precalculate = TRUE, colorize = !isTRUE(getOption(\"knitr.in.progress\")), model_info = NULL, type = \"classification\", by_workflow = FALSE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"model object - model explained data data.frame matrix - data used calculate explanations. provided, extracted model. Data passed without target column (shall provided y argument). NOTE: target variable present data, functionalities may work properly. y numeric vector outputs/scores. provided, shall size data predict_function function takes two arguments: model new data returns numeric vector predictions. default yhat. predict_function_target_column Character numeric containing either column name column number model prediction object class considered positive (.e. class associated probability 1). NULL, second column output taken binary classification. multiclass classification setting, parameter cause switch binary classification mode one vs others probabilities. residual_function function takes four arguments: model, data, target vector y predict function (optionally). return numeric vector model residuals given data. provided, response residuals (\\(y-\\hat{y}\\)) calculated. default residual_function_default. ... parameters label character - name model. default extracted 'class' attribute model verbose logical. TRUE (default) diagnostic messages printed precalculate logical. TRUE (default) predicted_values residual calculated explainer created. happen also verbose TRUE. Set verbose precalculate FALSE omit calculations. colorize logical. TRUE (default) WARNINGS, ERRORS NOTES colorized. work R console. Now default FALSE knitting TRUE otherwise. model_info named list (package, version, type) containing information model. NULL, DALEX seek information . type type model, either classification regression. specified type extracted model_info. by_workflow boolean determining whether list explainer, one per model, returned instead single explainer ensemble","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"explainer object DALEX::explain ready work DALEX","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"","code":"# \\donttest{ # using the whole ensemble lacerta_explainer <- explain_tidysdm(tidysdm::lacerta_ensemble) #> Preparation of a new explainer is initiated #> -> model label : data.frame ( default ) #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : predict_function #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidysdm , ver. 0.9.6.9002 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.01490969 , mean = 0.2861937 , max = 0.7169324 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.6465921 , mean = -0.03619367 , max = 0.7891973 #> A new explainer has been created! # by workflow explainer_list <- explain_tidysdm(tidysdm::lacerta_ensemble, by_workflow = TRUE ) #> Preparation of a new explainer is initiated #> -> model label : default_glm #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.2280177 , mean = 0.75 , max = 0.9854359 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.9096205 , mean = 5.395921e-12 , max = 0.7719823 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_rf #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.1315421 , mean = 0.7480648 , max = 1 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.6878921 , mean = 0.001935171 , max = 0.5870619 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_gbm #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.3390188 , mean = 0.7314788 , max = 0.9632964 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.9268645 , mean = 0.01852121 , max = 0.6280424 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_maxent #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.1095764 , mean = 0.6256817 , max = 0.9960248 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.8207859 , mean = 0.1243183 , max = 0.8904236 #> A new explainer has been created! # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":null,"dir":"Reference","previous_headings":"","what":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"Compute multivariate environmental similarity surfaces (MESS), described Elith et al., 2010.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"","code":"extrapol_mess(x, training, .col, ...) # Default S3 method extrapol_mess(x, training, ...) # S3 method for class 'SpatRaster' extrapol_mess(x, training, .col, filename = \"\", ...) # S3 method for class 'data.frame' extrapol_mess(x, training, .col, ...) # S3 method for class 'SpatRasterDataset' extrapol_mess(x, training, .col, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"x terra::SpatRaster, terra::SpatRasterDataset data.frame training matrix data.frame sf object containing reference values; column correspond one layer terra::SpatRaster object, exception presences column defined .col (optional). .col column containing presences (optional). specified, excluded computing MESS scores. ... additional arguments terra::writeRaster() filename character. Output filename (optional)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"terra::SpatRaster (data.frame) MESS values.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"function modified version mess package predicts, method added work terra::SpatRasterDataset. Note method terra::SpatRasterDataset assumes variables stored terra::SpatRaster time information within x. Time also assumed years. conditions met, possible manually extract terra::SpatRaster time step, use extrapol_mess terra::SpatRasters","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"Elith J., M. Kearney M., S. Phillips, 2010. art modelling range-shifting species. Methods Ecology Evolution 1:330-342.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"Jean-Pierre Rossi, Robert Hijmans, Paulo van Breugel, Andrea Manica","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":null,"dir":"Reference","previous_headings":"","what":"Filter to retain only variables that have low collinearity — filter_collinear","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"method finds subset variables low collinearity. provides three methods: cor_caret, stepwise approach remove variables pairwise correlation given cutoff, choosing variable greatest mean correlation (based algorithm caret::findCorrelation); vif_step, stepwise approach remove variables variance inflation factor given cutoff (based algorithm usdm::vifstep), vif_cor, stepwise approach , step, find pair variables highest correlation cutoff removes one largest vif. correlation certain cutoff. methods terra::SpatRaster, data.frame matrix. terra::SpatRaster data.frame, numeric variables considered.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"","code":"filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, ... ) # Default S3 method filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, ... ) # S3 method for class 'SpatRaster' filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, exhaustive = FALSE, ... ) # S3 method for class 'data.frame' filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, ... ) # S3 method for class 'matrix' filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"x terra::SpatRaster object, data.frame (numeric variables) cutoff numeric value used threshold remove variables. , \"cor_caret\" \"vif_cor\", pair-wise absolute correlation cutoff, defaults 0.7. \"vif_step\", variable inflation factor, defaults 10 verbose boolean whether additional information provided screen names logical; column names returned TRUE column index FALSE)? to_keep vector variable names want force set (note function return error correlation among variables higher cutoff). method character. One \"cor_caret\", \"vif_cor\" \"vif_step\". cor_type character. methods use correlation, type correlation: \"pearson\", \"kendall\", \"spearman\". Defaults \"pearson\" max_cells positive integer. maximum number cells used. smaller ncell(x), regular sample x used ... additional arguments specific given object type exhaustive boolean. Used terra::SpatRaster downsampling max_cells, require exhaustive approach terra::spatSample(). needed rasters sparse large, see help page terra::spatSample() details.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"vector names columns correlation threshold (names = TRUE), otherwise vector indices. Note indices numeric variables (.e. factors present, indices take account).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"Naimi, B., Hamm, N..S., Groen, T.., Skidmore, .K., Toxopeus, .G. 2014. positional uncertainty problem species distribution modelling?, Ecography 37 (2): 191-203.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"cor_caret: Original R code Dong Li, modified Max Kuhn Andrea Manica; vif_step vif_cor, original algorithm Babak Naimi, rewritten Andrea Manica tidysdm","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":null,"dir":"Reference","previous_headings":"","what":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"FUNCTION DEPRECATED. USE filter_collinear method=cor_caret instead","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"","code":"filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL) # Default S3 method filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL) # S3 method for class 'SpatRaster' filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL) # S3 method for class 'data.frame' filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL) # S3 method for class 'matrix' filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"x terra::SpatRaster object, data.frame (numeric variables), correlation matrix cutoff numeric value pair-wise absolute correlation cutoff verbose boolean printing details names logical; column names returned TRUE column index FALSE)? to_keep vector variable names want force set (note function return error correlation among variables higher cutoff).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"vector names columns correlation threshold (names = TRUE), otherwise vector indices. Note indices numeric variables (.e. factors present, indices take account).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"method finds subset variable correlation certain cutoff. methods terra::SpatRaster, data.frame, work directly correlation matrix previously estimated. data.frame, numeric variables considered. algorithm based caret::findCorrelation, using exact option. absolute values pair-wise correlations considered. two variables high correlation, function looks mean absolute correlation variable removes variable largest mean absolute correlation. several function package subselect can also used accomplish goal tend retain predictors.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the response variable from a formula — form_resp","title":"Get the response variable from a formula — form_resp","text":"counterpart rsample::form_pred.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the response variable from a formula — form_resp","text":"","code":"form_resp(x)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get the response variable from a formula — form_resp","text":"x formula","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the response variable from a formula — form_resp","text":"character name response","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get the response variable from a formula — form_resp","text":"Note: might behave well functions log(y). neither form_pred modified https://stackoverflow.com/questions/13217322/--reliably-get-dependent-variable-name--formula-object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/gam_formula.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a formula for gam — gam_formula","title":"Create a formula for gam — gam_formula","text":"function takes formula recipe, turns numeric predictors smooths given k. formula can passed workflow workflow set fitting gam.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/gam_formula.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a formula for gam — gam_formula","text":"","code":"gam_formula(object, k = 10)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/gam_formula.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a formula for gam — gam_formula","text":"object recipes::recipe, already trained k k value smooth","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/gam_formula.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a formula for gam — gam_formula","text":"formula","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":null,"dir":"Reference","previous_headings":"","what":"Split violin geometry for ggplots — geom_split_violin","title":"Split violin geometry for ggplots — geom_split_violin","text":"geometry displays density distribution two groups side side, two halves violin. Note emptyx aesthetic provided even want plot single variable (see example ).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Split violin geometry for ggplots — geom_split_violin","text":"","code":"geom_split_violin( mapping = NULL, data = NULL, stat = \"ydensity\", position = \"identity\", nudge = 0, ..., draw_quantiles = NULL, trim = TRUE, scale = \"area\", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Split violin geometry for ggplots — geom_split_violin","text":"mapping Set aesthetic mappings created aes(). specified inherit.aes = TRUE (default), combined default mapping top level plot. must supply mapping plot mapping. data data displayed layer. three options: NULL, default, data inherited plot data specified call ggplot(). data.frame, object, override plot data. objects fortified produce data frame. See fortify() variables created. function called single argument, plot data. return value must data.frame, used layer data. function can created formula (e.g. ~ head(.x, 10)). stat Use override default connection ggplot2::geom_violin() ggplot2::stat_ydensity(). position position adjustment use data layer. can used various ways, including prevent overplotting improving display. position argument accepts following: result calling position function, position_jitter(). method allows passing extra arguments position. string naming position adjustment. give position string, strip function name position_ prefix. example, use position_jitter(), give position \"jitter\". information ways specify position, see layer position documentation. nudge Add space half-violin middle space allotted given factor x-axis. ... arguments passed layer()'s params argument. arguments broadly fall one 4 categories . Notably, arguments position argument, aesthetics required can passed .... Unknown arguments part 4 categories ignored. Static aesthetics mapped scale, fixed value apply layer whole. example, colour = \"red\" linewidth = 3. geom's documentation Aesthetics section lists available options. 'required' aesthetics passed params. Please note passing unmapped aesthetics vectors technically possible, order required length guaranteed parallel input data. constructing layer using stat_*() function, ... argument can used pass parameters geom part layer. example stat_density(geom = \"area\", outline.type = \"\"). geom's documentation lists parameters can accept. Inversely, constructing layer using geom_*() function, ... argument can used pass parameters stat part layer. example geom_area(stat = \"density\", adjust = 0.5). stat's documentation lists parameters can accept. key_glyph argument layer() may also passed .... can one functions described key glyphs, change display layer legend. draw_quantiles (NULL) (default), draw horizontal lines given quantiles density estimate. trim TRUE (default), trim tails violins range data. FALSE, trim tails. scale \"area\" (default), violins area (trimming tails). \"count\", areas scaled proportionally number observations. \"width\", violins maximum width. na.rm FALSE, default, missing values removed warning. TRUE, missing values silently removed. show.legend logical. layer included legends? NA, default, includes aesthetics mapped. FALSE never includes, TRUE always includes. can also named logical vector finely select aesthetics display. inherit.aes FALSE, overrides default aesthetics, rather combining . useful helper functions define data aesthetics inherit behaviour default plot specification, e.g. borders().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Split violin geometry for ggplots — geom_split_violin","text":"ggplot2::layer object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Split violin geometry for ggplots — geom_split_violin","text":"implementation based https://stackoverflow.com/questions/35717353/split-violin-plot--ggplot2. Credit goes @jan-jlx providing complete implementation StackOverflow, Trang Q. Nguyen adding nudge parameter.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Split violin geometry for ggplots — geom_split_violin","text":"","code":"data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) ggplot(bradypus_tb, aes( x = \"\", y = cld6190_ann, fill = presence )) + geom_split_violin(nudge = 0.01)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_cellsize.html","id":null,"dir":"Reference","previous_headings":"","what":"Get default grid cellsize for a given dataset — grid_cellsize","title":"Get default grid cellsize for a given dataset — grid_cellsize","text":"function facilitates using spatialsample::spatial_block_cv multiple times analysis. spatialsample::spatial_block_cv creates grid based object data. However, spatial blocks generated multiple times analysis (e.g. spatial_initial_split(), subsequently cross-validation training dataset), might desirable keep grid). applying function largest dataset, usually full dataset spatial_initial_split(). resulting cellsize can used option spatialsample::spatial_block_cv.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_cellsize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get default grid cellsize for a given dataset — grid_cellsize","text":"","code":"grid_cellsize(data, n = c(10, 10))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_cellsize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get default grid cellsize for a given dataset — grid_cellsize","text":"data sf::sf dataset used size grid n number cells grid, defaults c(10,10), also default sf::st_make_grid()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_cellsize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get default grid cellsize for a given dataset — grid_cellsize","text":"cell size","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_offset.html","id":null,"dir":"Reference","previous_headings":"","what":"Get default grid cellsize for a given dataset — grid_offset","title":"Get default grid cellsize for a given dataset — grid_offset","text":"function facilitates using spatialsample::spatial_block_cv multiple times analysis. spatialsample::spatial_block_cv creates grid based object data. However, spatial blocks generated multiple times analysis (e.g. spatial_initial_split(), subsequently cross-validation training dataset), might desirable keep grid). applying function largest dataset, usually full dataset spatial_initial_split(). resulting cellsize can used option spatialsample::spatial_block_cv.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_offset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get default grid cellsize for a given dataset — grid_offset","text":"","code":"grid_offset(data)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_offset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get default grid cellsize for a given dataset — grid_offset","text":"data sf::sf dataset used size grid","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_offset.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get default grid cellsize for a given dataset — grid_offset","text":"grid offset","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/horses.html","id":null,"dir":"Reference","previous_headings":"","what":"Coordinates of radiocarbon dates for horses — horses","title":"Coordinates of radiocarbon dates for horses — horses","text":"Coordinates presences horses 22k 8k YBP.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/horses.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coordinates of radiocarbon dates for horses — horses","text":"","code":"horses"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/horses.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Coordinates of radiocarbon dates for horses — horses","text":"tibble 1,297 rows 3 variables: latitude latitudes degrees longitude longitudes degrees time_bp time years present","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum Cohen's Kappa — kap_max","title":"Maximum Cohen's Kappa — kap_max","text":"Cohen's Kappa (yardstick::kap()) measure similar yardstick::accuracy(), normalises observed accuracy value expected chance (helps unbalanced cases one class predominant).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum Cohen's Kappa — kap_max","text":"","code":"kap_max(data, ...) # S3 method for class 'data.frame' kap_max( data, truth, ..., estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL ) # S3 method for class 'sf' kap_max(data, ...) kap_max_vec( truth, estimate, estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum Cohen's Kappa — kap_max","text":"data Either data.frame containing columns specified truth estimate arguments, table/matrix true class results columns table. ... set unquoted column names one dplyr selector functions choose variables contain class probabilities. truth binary, 1 column selected, correspond value event_level. Otherwise, many columns factor levels truth ordering columns factor levels truth. truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimator One \"binary\", \"hand_till\", \"macro\", \"macro_weighted\" specify type averaging done. \"binary\" relevant two class case. others general methods calculating multiclass metrics. default automatically choose \"binary\" truth binary, \"hand_till\" truth >2 levels case_weights specified, \"macro\" truth >2 levels case_weights specified (case \"hand_till\" well-defined). na_rm logical value indicating whether NA values stripped computation proceeds. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default uses internal helper generally defaults \"first\" case_weights optional column identifier case weights. unquoted column name evaluates numeric column data. _vec() functions, numeric vector. estimate truth binary, numeric vector class probabilities corresponding \"relevant\" class. Otherwise, matrix many columns factor levels truth. assumed order levels truth.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum Cohen's Kappa — kap_max","text":"tibble columns .metric, .estimator, .estimate 1 row values. grouped data frames, number rows returned number groups.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum Cohen's Kappa — kap_max","text":"function calibrates probability threshold classify presences maximises kappa. multiclass version function, operates binary predictions (e.g. presences absences SDMs).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Maximum Cohen's Kappa — kap_max","text":"Cohen, J. (1960). \"coefficient agreement nominal scales\". Educational Psychological Measurement. 20 (1): 37-46. Cohen, J. (1968). \"Weighted kappa: Nominal scale agreement provision scaled disagreement partial credit\". Psychological Bulletin. 70 (4): 213-220.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum Cohen's Kappa — kap_max","text":"","code":"kap_max(two_class_example, truth, Class1) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 kap_max binary 0.725"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert a geographic distance from km to m — km2m","title":"Convert a geographic distance from km to m — km2m","text":"function takes distance km converts meters, units generally used geographic operations R. trivial conversion, functions ensures zeroes lost along way!","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert a geographic distance from km to m — km2m","text":"","code":"km2m(x)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert a geographic distance from km to m — km2m","text":"x number km","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert a geographic distance from km to m — km2m","text":"number meters","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert a geographic distance from km to m — km2m","text":"","code":"km2m(10000) #> [1] 1e+07 km2m(1) #> [1] 1000"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta.html","id":null,"dir":"Reference","previous_headings":"","what":"Coordinates of presences for Iberian emerald lizard — lacerta","title":"Coordinates of presences for Iberian emerald lizard — lacerta","text":"Coordinates presences Lacerta schreiberi. variables follows:","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coordinates of presences for Iberian emerald lizard — lacerta","text":"","code":"lacerta"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Coordinates of presences for Iberian emerald lizard — lacerta","text":"tibble 1,297 rows 3 variables: ID ids GBIF latitude latitudes degrees longitude longitudes degrees","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"A simple ensemble for the lacerta data — lacerta_ensemble","title":"A simple ensemble for the lacerta data — lacerta_ensemble","text":"Ensemble SDM Lacerta schreiberi, generated vignette.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A simple ensemble for the lacerta data — lacerta_ensemble","text":"","code":"lacerta_ensemble"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_ensemble.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A simple ensemble for the lacerta data — lacerta_ensemble","text":"simple_ensemble object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_rep_ens.html","id":null,"dir":"Reference","previous_headings":"","what":"A repeat ensemble for the lacerta data — lacerta_rep_ens","title":"A repeat ensemble for the lacerta data — lacerta_rep_ens","text":"Ensemble SDM Lacerta schreiberi, generated vignette.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_rep_ens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A repeat ensemble for the lacerta data — lacerta_rep_ens","text":"","code":"lacerta_rep_ens"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_rep_ens.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A repeat ensemble for the lacerta data — lacerta_rep_ens","text":"repeat_ensemble object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacertidae_background.html","id":null,"dir":"Reference","previous_headings":"","what":"Coordinates of presences for lacertidae in the Iberian peninsula — lacertidae_background","title":"Coordinates of presences for lacertidae in the Iberian peninsula — lacertidae_background","text":"Coordinates presences lacertidae, used background lacerta dataset.. variables follows:","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacertidae_background.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coordinates of presences for lacertidae in the Iberian peninsula — lacertidae_background","text":"","code":"lacertidae_background"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacertidae_background.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Coordinates of presences for lacertidae in the Iberian peninsula — lacertidae_background","text":"tibble 1,297 rows 3 variables: ID ids GBIF latitude latitudes degrees longitude longitudes degrees","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":null,"dir":"Reference","previous_headings":"","what":"Make a mask from presence data — make_mask_from_presence","title":"Make a mask from presence data — make_mask_from_presence","text":"functions uses presence column create mask apply raster define area interest. Two methods available: one uses buffer around presence, one create convex hull around presences (possibility adding buffer around hull).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make a mask from presence data — make_mask_from_presence","text":"","code":"make_mask_from_presence(data, method = \"buffer\", buffer = 0, return_sf = FALSE)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make a mask from presence data — make_mask_from_presence","text":"data sf::sf data frame presences.. method method use create mask. Either 'buffer' 'convex_hull' buffer buffer add around presence (units crs data; lat/lon, buffer meters), around convex hull (method 'convex_hull') return_sf whether return mask sf object (TRUE) terra::SpatVector object (FALSE, default)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make a mask from presence data — make_mask_from_presence","text":"terra::SpatVector sf object (depending value return_sf) mask","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Make a mask from presence data — make_mask_from_presence","text":"use terra::mask() raster, use return_sf = FALSE get terra::SpatVector object can used masking.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Make a mask from presence data — make_mask_from_presence","text":"","code":"lacerta_sf <- lacerta %>% sf::st_as_sf(coords = c(\"longitude\", \"latitude\")) %>% sf::st_set_crs(4326) land_mask <- terra::readRDS(system.file(\"extdata/lacerta_land_mask.rds\", package = \"tidysdm\")) mask_buffer <- make_mask_from_presence(lacerta_sf, method = \"buffer\", buffer = 60000) terra::plot(terra::mask(land_mask, mask_buffer)) mask_ch <- make_mask_from_presence(lacerta_sf, method = \"convex_hull\") terra::plot(terra::mask(land_mask, mask_ch))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":null,"dir":"Reference","previous_headings":"","what":"MaxEnt model — maxent","title":"MaxEnt model — maxent","text":"maxent defines MaxEnt model used Species Distribution Models. good guide options MaxEnt model work can found https://onlinelibrary.wiley.com/doi/full/10.1111/j.1600-0587.2013.07872.x","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"MaxEnt model — maxent","text":"","code":"maxent( mode = \"classification\", engine = \"maxnet\", feature_classes = NULL, regularization_multiplier = NULL )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"MaxEnt model — maxent","text":"mode single character string type model. possible value model \"classification\". engine single character string specifying computational engine use fitting. Currently \"maxnet\" available. feature_classes character, continuous feature classes desired, either \"default\" subset \"lqpht\" (example, \"lh\") regularization_multiplier numeric, constant adjust regularization","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"MaxEnt model — maxent","text":"parsnip::model_spec maxent model","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"MaxEnt model — maxent","text":"","code":"# \\donttest{ # format the data data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) %>% select(-ecoreg) # fit the model, and make some predictions maxent_spec <- maxent(feature_classes = \"lq\") maxent_fitted <- maxent_spec %>% fit(presence ~ ., data = bradypus_tb) pred_prob <- predict(maxent_fitted, new_data = bradypus[, -1], type = \"prob\") pred_class <- predict(maxent_fitted, new_data = bradypus[, -1], type = \"class\") # Now with tuning maxent_spec <- maxent( regularization_multiplier = tune(), feature_classes = tune() ) set.seed(452) cv <- vfold_cv(bradypus_tb, v = 2) maxent_tune_res <- maxent_spec %>% tune_grid(presence ~ ., cv, grid = 3) show_best(maxent_tune_res, metric = \"roc_auc\") #> # A tibble: 3 × 8 #> feature_classes regularization_multip…¹ .metric .estimator mean n std_err #> #> 1 l 1.02 roc_auc binary 0.857 2 0.0143 #> 2 lqph 1.90 roc_auc binary 0.856 2 0.0121 #> 3 lqph 2.50 roc_auc binary 0.854 2 0.0123 #> # ℹ abbreviated name: ¹​regularization_multiplier #> # ℹ 1 more variable: .config # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":null,"dir":"Reference","previous_headings":"","what":"Parameters for maxent models — maxent_params","title":"Parameters for maxent models — maxent_params","text":"parameters auxiliary MaxEnt models using \"maxnet\" engine. functions used tuning functions, user rarely access directly.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Parameters for maxent models — maxent_params","text":"","code":"regularization_multiplier(range = c(0.5, 3), trans = NULL) feature_classes(values = c(\"l\", \"lq\", \"lqp\", \"lqph\", \"lqpht\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Parameters for maxent models — maxent_params","text":"range two-element vector holding defaults smallest largest possible values, respectively. transformation specified, values transformed units. trans trans object scales package, scales::log10_trans() scales::reciprocal_trans(). provided, default used matches units used range. transformation, NULL. values feature_classes(), character string subset \"lqpht\" (example, \"lh\")","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Parameters for maxent models — maxent_params","text":"param object can used tuning.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Parameters for maxent models — maxent_params","text":"","code":"regularization_multiplier() #> Reg. multiplier (quantitative) #> Range: [0.5, 3] feature_classes() #> Feature classes (qualitative) #> 5 possible values include: #> 'l', 'lq', 'lqp', 'lqph' and 'lqpht'"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":null,"dir":"Reference","previous_headings":"","what":"Wrapper to fit maxnet models with formulae — maxnet_fit","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"function wrapper around maxnet::maxnet, takes formula data well exposing parameters normalisation manner compatible parsnip. Users unlikely use function directly. parsnip model specification MaxEnt, see maxent().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"","code":"maxnet_fit( formula, data, regmult = 1, classes = \"default\", regfun = maxnet::maxnet.default.regularization, addsamplestobackground = TRUE, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"formula formula defining outcome predictors data data.frame outcomes predictors regmult numeric, constant adjust regularization classes character, continuous feature classes desired, either \"default\" subset \"lqpht\" (example, \"lh\") regfun function, computes regularization constant feature addsamplestobackground logical, TRUE add background presence sample already ... currently used.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"Maxnet returns object class maxnet, list consisting glmnet model following elements added: betas nonzero coefficients fitted model alpha constant offset making exponential model sum one background data entropy entropy exponential model penalty.factor regularization constants used feature featuremins minimum feature, used clamping featuremaxs maximum feature, used clamping varmin minimum predictor, used clamping varmax maximum predictor, used clamping samplemeans mean predictor samples (majority factors) levels levels predictor factor","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"response needs factor class representing presences reference level factor (expected classification models). good guide options Maxent model work can found https://onlinelibrary.wiley.com/doi/full/10.1111/j.1600-0587.2013.07872.x","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"","code":"# \\donttest{ # we repeat the example in the `maxnet` package data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) mod <- maxnet_fit(presence ~ ., data = bradypus_tb, classes = \"lq\") plot(mod, \"tmp6190_ann\") # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_predict.html","id":null,"dir":"Reference","previous_headings":"","what":"Wrapper to predict maxnet models — maxnet_predict","title":"Wrapper to predict maxnet models — maxnet_predict","text":"function wrapper around predict method maxnet::maxnet, making function compatible parsnip. Users unlikely use function directly. parsnip model specification MaxEnt, see maxent().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_predict.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wrapper to predict maxnet models — maxnet_predict","text":"","code":"maxnet_predict( object, newdata, type = c(\"class\", \"prob\"), maxnet_type = c(\"cloglog\", \"link\", \"exponential\", \"logistic\"), clamp = TRUE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_predict.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Wrapper to predict maxnet models — maxnet_predict","text":"object maxnet::maxnet object newdata dataframe new data type either \"prob\" \"class\" maxnet_type transformation used prediction clamp logical, defining whether clamping observed ranges used","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_predict.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Wrapper to predict maxnet models — maxnet_predict","text":"tibble predictions","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute overlap metrics of the two niches — niche_overlap","title":"Compute overlap metrics of the two niches — niche_overlap","text":"function computes overlap metrics two rasters. currently implements Schoener's D inverse Hellinger's distance.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute overlap metrics of the two niches — niche_overlap","text":"","code":"niche_overlap(x, y, method = c(\"Schoener\", \"Hellinger\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute overlap metrics of the two niches — niche_overlap","text":"x terra::SpatRaster single layer y terra::SpatRaster single layer method string (vector strings) taking values \"Schoener\" \"Hellinger\"","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute overlap metrics of the two niches — niche_overlap","text":"list overlap metrics, slots D (depending method)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Compute overlap metrics of the two niches — niche_overlap","text":"Note Hellinger's distance normalised dividing square root 2 (correct asymptote Hellinger's D), rather incorrect 2 used originally Warren et al (2008), based Erratum paper.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Compute overlap metrics of the two niches — niche_overlap","text":"Warren, D.L., Glor, R.E. & Turelli M. (2008) Environmental niche equivalency versus conservativism: quantitative approaches niche evolution. Evolution 62: 2868-2883","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":null,"dir":"Reference","previous_headings":"","what":"Find threshold that optimises a given metric — optim_thresh","title":"Find threshold that optimises a given metric — optim_thresh","text":"function returns threshold turn probabilities binary classes whilst optimising given metric. Currently available tss_max, kap_max sensitivity (target sensitivity required).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find threshold that optimises a given metric — optim_thresh","text":"","code":"optim_thresh(truth, estimate, metric, event_level = \"first\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find threshold that optimises a given metric — optim_thresh","text":"truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimate predicted probability event metric character metric optimised. Currently \"tss_max\", \"kap_max\", \"sensitivity\" given target (e.g. c(\"sensitivity\",0.8)) event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default uses internal helper generally defaults \"first\"","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find threshold that optimises a given metric — optim_thresh","text":"probability threshold event","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Find threshold that optimises a given metric — optim_thresh","text":"","code":"optim_thresh(two_class_example$truth, two_class_example$Class1, metric = c(\"tss_max\")) #> [1] 0.7544818 optim_thresh(two_class_example$truth, two_class_example$Class1, metric = c(\"sens\", 0.9)) #> [1] 0.3710924"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_kap_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Find threshold that maximises Kappa — optim_thresh_kap_max","title":"Find threshold that maximises Kappa — optim_thresh_kap_max","text":"internal function returns threshold turn probabilities binary classes maximise kappa","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_kap_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find threshold that maximises Kappa — optim_thresh_kap_max","text":"","code":"optim_thresh_kap_max(presences, absences)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_kap_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find threshold that maximises Kappa — optim_thresh_kap_max","text":"presences Probabilities presences. absences Provabilities absences","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_kap_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find threshold that maximises Kappa — optim_thresh_kap_max","text":"probability threshold event","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_sens.html","id":null,"dir":"Reference","previous_headings":"","what":"Find threshold that gives a target sensitivity — optim_thresh_sens","title":"Find threshold that gives a target sensitivity — optim_thresh_sens","text":"internal function returns threshold turn probabilities binary classes given target sensitivity","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_sens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find threshold that gives a target sensitivity — optim_thresh_sens","text":"","code":"optim_thresh_sens(presences, absences, sens_target)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_sens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find threshold that gives a target sensitivity — optim_thresh_sens","text":"presences Probabilities presences. absences Provabilities absences sens_target target sensitivity","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_sens.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find threshold that gives a target sensitivity — optim_thresh_sens","text":"probability threshold event","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_tss_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Find threshold that maximises TSS — optim_thresh_tss_max","title":"Find threshold that maximises TSS — optim_thresh_tss_max","text":"internal function returns threshold turn probabilities binary classes maximise TSS","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_tss_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find threshold that maximises TSS — optim_thresh_tss_max","text":"","code":"optim_thresh_tss_max(presences, absences)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_tss_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find threshold that maximises TSS — optim_thresh_tss_max","text":"presences Probabilities presences. absences Provabilities absences","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_tss_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find threshold that maximises TSS — optim_thresh_tss_max","text":"probability threshold event","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/out_of_range_warning.html","id":null,"dir":"Reference","previous_headings":"","what":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","title":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","text":"function helps making sure , assign times time_step layers raster, values badly range","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/out_of_range_warning.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","text":"","code":"out_of_range_warning(times, time_steps)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/out_of_range_warning.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","text":"times times locations time_steps time steps raster","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/out_of_range_warning.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","text":"NULL return","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/pipe.html","id":null,"dir":"Reference","previous_headings":"","what":"Pipe operator — %>%","title":"Pipe operator — %>%","text":"See magrittr::%>% details.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/pipe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pipe operator — %>%","text":"","code":"lhs %>% rhs"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/pipe.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Pipe operator — %>%","text":"lhs value magrittr placeholder. rhs function call using magrittr semantics.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/pipe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Pipe operator — %>%","text":"result calling rhs(lhs).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot presences vs background — plot_pres_vs_bg","title":"Plot presences vs background — plot_pres_vs_bg","text":"Create composite plots contrasting distribution multiple variables presences vs background.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot presences vs background — plot_pres_vs_bg","text":"","code":"plot_pres_vs_bg(.data, .col)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot presences vs background — plot_pres_vs_bg","text":".data data.frame (derived object, tibble::tibble, sf::st_sf) values bioclimate variables presences background .col column containing presences; assumes presences first level factor","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Plot presences vs background — plot_pres_vs_bg","text":"patchwork composite plot","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot presences vs background — plot_pres_vs_bg","text":"","code":"# \\donttest{ data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) %>% select(-ecoreg) bradypus_tb %>% plot_pres_vs_bg(presence) # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.repeat_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict for a repeat ensemble set — predict.repeat_ensemble","title":"Predict for a repeat ensemble set — predict.repeat_ensemble","text":"Predict new dataset using repeat ensemble. Predictions individual models combined according fun","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.repeat_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict for a repeat ensemble set — predict.repeat_ensemble","text":"","code":"# S3 method for class 'repeat_ensemble' predict( object, new_data, type = \"prob\", fun = \"mean\", metric_thresh = NULL, class_thresh = NULL, members = FALSE, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.repeat_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict for a repeat ensemble set — predict.repeat_ensemble","text":"object repeat_ensemble object new_data data frame look variables predict. type type prediction, \"prob\" \"class\". fun string defining aggregating function. can take values mean, median, weighted_mean, weighted_median none. possible combine multiple functions, except \"none\". set \"none\", individual member predictions returned (automatically sets member TRUE) metric_thresh vector length 2 giving metric threshold, used prune models ensemble used prediction. 'metrics' need computed workflow tuned. Examples c(\"accuracy\",0.8) c(\"boyce_cont\",0.7) class_thresh probability threshold used convert probabilities classes. can number (0 1), character metric (currently \"tss_max\" \"sensitivity\"). sensitivity, additional target value passed along second element vector, e.g. c(\"sensitivity\",0.8). members boolean defining whether individual predictions member added ensemble prediction. columns individual members name workflow prefix, separated \".\" usual column names predictions. ... used method.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.repeat_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict for a repeat ensemble set — predict.repeat_ensemble","text":"tibble predictions","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.simple_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict for a simple ensemble set — predict.simple_ensemble","title":"Predict for a simple ensemble set — predict.simple_ensemble","text":"Predict new dataset using simple ensemble. Predictions individual models (.e. workflows) combined according fun","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.simple_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict for a simple ensemble set — predict.simple_ensemble","text":"","code":"# S3 method for class 'simple_ensemble' predict( object, new_data, type = \"prob\", fun = \"mean\", metric_thresh = NULL, class_thresh = NULL, members = FALSE, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.simple_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict for a simple ensemble set — predict.simple_ensemble","text":"object simple_ensemble object new_data data frame look variables predict. type type prediction, \"prob\" \"class\". fun string defining aggregating function. can take values mean, median, weighted_mean, weighted_median none. possible combine multiple functions, except \"none\". set \"none\", individual member predictions returned (automatically sets member TRUE) metric_thresh vector length 2 giving metric threshold, used prune models ensemble used prediction. 'metrics' need computed workflow tuned. Examples c(\"accuracy\",0.8) c(\"boyce_cont\",0.7) class_thresh probability threshold used convert probabilities classes. can number (0 1), character metric (currently \"tss_max\" \"sensitivity\"). sensitivity, additional target value passed along second element vector, e.g. c(\"sensitivity\",0.8). members boolean defining whether individual predictions member added ensemble prediction. columns individual members name workflow prefix, separated \".\" usual column names predictions. ... used method.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.simple_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict for a simple ensemble set — predict.simple_ensemble","text":"tibble predictions","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict_raster.html","id":null,"dir":"Reference","previous_headings":"","what":"Make predictions for a whole raster — predict_raster","title":"Make predictions for a whole raster — predict_raster","text":"function allows use raster data make predictions variety tidymodels objects, simple_ensemble stacks::stacks","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict_raster.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make predictions for a whole raster — predict_raster","text":"","code":"predict_raster(object, raster, ...) # Default S3 method predict_raster(object, raster, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict_raster.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make predictions for a whole raster — predict_raster","text":"object tidymodels object interest raster terra::SpatRaster input data. include levels names variables used object ... parameters passed standard predict() function appropriate object type (e.g. metric_thresh class_thresh).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict_raster.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make predictions for a whole raster — predict_raster","text":"terra::SpatRaster predictions","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":null,"dir":"Reference","previous_headings":"","what":"Probability metrics for sf objects — prob_metrics_sf","title":"Probability metrics for sf objects — prob_metrics_sf","text":"tidysdm provides specialised metrics SDMs, help pages(boyce_cont(), kap_max(), tss_max()). Additionally, also provides methods handle sf::sf objects following standard yardstick metrics: yardstick::average_precision() yardstick::brier_class() yardstick::classification_cost() yardstick::gain_capture() yardstick::mn_log_loss() yardstick::pr_auc() yardstick::roc_auc() yardstick::roc_aunp() yardstick::roc_aunu()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Probability metrics for sf objects — prob_metrics_sf","text":"","code":"# S3 method for class 'sf' average_precision(data, ...) # S3 method for class 'sf' brier_class(data, ...) # S3 method for class 'sf' classification_cost(data, ...) # S3 method for class 'sf' gain_capture(data, ...) # S3 method for class 'sf' mn_log_loss(data, ...) # S3 method for class 'sf' pr_auc(data, ...) # S3 method for class 'sf' roc_auc(data, ...) # S3 method for class 'sf' roc_aunp(data, ...) # S3 method for class 'sf' roc_aunu(data, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Probability metrics for sf objects — prob_metrics_sf","text":"data sf::sf object ... parameters pass data.frame version metric. See specific man page metric interest.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Probability metrics for sf objects — prob_metrics_sf","text":"tibble columns .metric, .estimator, .estimate 1 row values.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Probability metrics for sf objects — prob_metrics_sf","text":"Note roc_aunp roc_aunu multiclass metrics, relevant SDMs (work binary response). included completeness, class probability metrics yardstick sf method, applications SDMs.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_to_binary.html","id":null,"dir":"Reference","previous_headings":"","what":"simple function to convert probability to binary classes — prob_to_binary","title":"simple function to convert probability to binary classes — prob_to_binary","text":"simple function convert probability binary classes","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_to_binary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"simple function to convert probability to binary classes — prob_to_binary","text":"","code":"prob_to_binary(x, thresh, class_levels)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_to_binary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"simple function to convert probability to binary classes — prob_to_binary","text":"x vector probabilities thresh threshold convert binary class_levels binary levels","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_to_binary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"simple function to convert probability to binary classes — prob_to_binary","text":"vector binary values","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":null,"dir":"Reference","previous_headings":"","what":"Recipe for sf objects — recipe.sf","title":"Recipe for sf objects — recipe.sf","text":"method recipes::recipe() handles case x sf::sf object, commonly used Species Distribution Model, generates spatial_recipe.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Recipe for sf objects — recipe.sf","text":"","code":"# S3 method for class 'sf' recipe(x, ...) spatial_recipe(x, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Recipe for sf objects — recipe.sf","text":"x sf::sf data frame. ... parameters passed recipes::recipe()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Recipe for sf objects — recipe.sf","text":"object class spatial_recipe, derived version recipes::recipe() , see manpage recipes::recipe() details.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Recipe for sf objects — recipe.sf","text":"recipes::recipe() natively compatible sf::sf objects. problem geometry column sf::sf objects list, incompatible translation formulae recipes::recipe(). method strips geometry column data.frame replaces simple X Y columns operations, thus allowing usual processing recipes::recipe() succeed (X Y give role coords spatial recipe). prepping baking spatial_recipe, data.frame tibble without coordinates used training new_data, dummy X Y columns generated filled NAs. NOTE order matters! need use syntax recipe(x=sf_obj, formula=class~.) method successfully detect sf::sf object. Starting formula fail.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/repeat_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Repeat ensemble — repeat_ensemble","title":"Repeat ensemble — repeat_ensemble","text":"ensemble based multiple sets pseudoabsences/background. object collection (list) simple_ensemble objects predictions combined simple way (e.g. taking either mean median). simple_ensemble contains best version given model type following turning; simple ensembles need metric estimated cv process.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/repeat_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Repeat ensemble — repeat_ensemble","text":"","code":"repeat_ensemble(...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/repeat_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Repeat ensemble — repeat_ensemble","text":"... used, function just creates empty repeat_ensemble object. Members added add_best_candidates()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/repeat_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Repeat ensemble — repeat_ensemble","text":"empty repeat_ensemble","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample background points for SDM analysis — sample_background","title":"Sample background points for SDM analysis — sample_background","text":"function samples background points raster given set presences. locations returned center points sampled cells, can overlap presences (contrast pseudo-absences, see sample_pseudoabs). following methods implemented: 'random': background randomly sampled region covered raster (.e. NAs). 'dist_max': background randomly sampled unioned buffers 'dist_max' presences (distances 'm' lonlat rasters, map units projected rasters). Using union buffers means areas multiple buffers oversampled. also referred \"thickening\". 'bias': background points sampled according surface representing biased sampling effort.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample background points for SDM analysis — sample_background","text":"","code":"sample_background( data, raster, n, coords = NULL, method = \"random\", class_label = \"background\", return_pres = TRUE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample background points for SDM analysis — sample_background","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster cells sampled (first layer used determine cells NAs, thus can sampled). sampling \"biased\", sampling probability proportional values first layer (.e. band) raster. n number background points sample. coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\"). method sampling method. One 'random', 'dist_max', 'targeted'. dist_max, maximum distance set additional element vector, e.g c('dist_max',70000). class_label label given sampled points. Defaults background return_pres return presences together background single tibble.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample background points for SDM analysis — sample_background","text":"object class tibble::tibble. presences returned, presence level set reference (match expectations yardstick package considers first level event).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Sample background points for SDM analysis — sample_background","text":"Note units distance depend projection raster.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background_time.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample background points for SDM analysis for points with a time point. — sample_background_time","title":"Sample background points for SDM analysis for points with a time point. — sample_background_time","text":"function samples background points raster given set presences. locations returned center points sampled cells,, can overlap presences (contrast pseudo-absences, see sample_pseudoabs_time). following methods implemented: 'random': background points randomly sampled region covered raster (.e. NAs). 'dist_max': background points randomly sampled unioned buffers 'dist_max' presences (distances 'm' lonlat rasters, map units projected rasters). Using union buffers means areas multiple buffers oversampled. also referred \"thickening\". 'bias': background points sampled according surface representing biased sampling effort. Note surface time step normalised sum 1;use n_per_time_step affect sampling effort within time step.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background_time.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample background points for SDM analysis for points with a time point. — sample_background_time","text":"","code":"sample_background_time( data, raster, n_per_time_step, coords = NULL, time_col = \"time\", lubridate_fun = c, method = \"random\", class_label = \"background\", return_pres = TRUE, time_buffer = 0 )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background_time.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample background points for SDM analysis for points with a time point. — sample_background_time","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster terra::SpatRasterDataset cells sampled. terra::SpatRasterDataset, first dataset used define cells valid, NAs. n_per_time_step number background points sample time step (.e. vector length equal number time steps raster) coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") time_col name column time; time lubridate object, use lubridate_fun provide function can used convert appropriately lubridate_fun function convert time column lubridate object method sampling method. One 'random', 'dist_min', 'dist_max', 'dist_disc'. class_label label given sampled points. Defaults background return_pres return presences together background single tibble time_buffer buffer time axis around presences defines effect sampling background method 'max_dist'. set zero, presences effect time step assigned raster; positive value, defines number days date provided time column presence considered (e.g. 20 days means presence considered time steps equivalent plus minus twenty days date).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background_time.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample background points for SDM analysis for points with a time point. — sample_background_time","text":"object class tibble::tibble. presences returned, presence level set reference (match expectations yardstick package considers first level event)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","title":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","text":"function samples pseudo-absence points raster given set presences. locations returned center points sampled cells, can overlap presences (contrast background points, see sample_background). following methods implemented: 'random': pseudo-absences randomly sampled region covered raster (.e. NAs). 'dist_min': pseudo-absences randomly sampled region excluding buffer 'dist_min' presences (distances 'm' lonlat rasters, map units projected rasters). 'dist_max': pseudo-absences randomly sampled unioned buffers 'dist_max' presences (distances 'm' lonlat rasters, map units projected rasters). Using union buffers means areas multiple buffers oversampled. also referred \"thickening\". 'dist_disc': pseudo-absences randomly sampled unioned discs around presences two values 'dist_disc' defining minimum maximum distance presences.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","text":"","code":"sample_pseudoabs( data, raster, n, coords = NULL, method = \"random\", class_label = \"pseudoabs\", return_pres = TRUE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster cells sampled n number pseudoabsence points sample coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") method sampling method. One 'random', 'dist_min', 'dist_max', 'dist_disc'. Threshold distances set additional elements vector, e.g c('dist_min',70000) c('dist_disc',50000,200000). class_label label given sampled points. Defaults pseudoabs return_pres return presences together pseudoabsences single tibble","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","text":"object class tibble::tibble. presences returned, presence level set reference (match expectations yardstick package considers first level event)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs_time.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","title":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","text":"function samples pseudo-absence points raster given set presences. locations returned center points sampled cells, can overlap presences (contrast background points, see sample_background_time). following methods implemented: 'random': pseudo-absences randomly sampled region covered raster (.e. NAs). 'dist_min': pseudo-absences randomly sampled region excluding buffer 'dist_min' presences (distances 'm' lonlat rasters, map units projected rasters). 'dist_max': pseudo-absences randomly sampled unioned buffers 'dist_max' presences (distances 'm' lonlat rasters, map units projected rasters). Using union buffers means areas multiple buffers oversampled. also referred \"thickening\". 'dist_disc': pseudo-absences randomly sampled unioned discs around presences two values 'dist_disc' defining minimum maximum distance presences.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs_time.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","text":"","code":"sample_pseudoabs_time( data, raster, n_per_presence, coords = NULL, time_col = \"time\", lubridate_fun = c, method = \"random\", class_label = \"pseudoabs\", return_pres = TRUE, time_buffer = 0 )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs_time.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster terra::SpatRasterDataset cells sampled. terra::SpatRasterDataset, first dataset used define cells valid, NAs. n_per_presence number pseudoabsence points sample presence coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") time_col name column time; time lubridate object, use lubridate_fun provide function can used convert appropriately lubridate_fun function convert time column lubridate object method sampling method. One 'random', 'dist_min', 'dist_max', 'dist_disc'. class_label label given sampled points. Defaults pseudoabs return_pres return presences together pseudoabsences single tibble time_buffer buffer time axis around presences defines effect sampling pseudoabsences. set zero, presences effect time step assigned raster; positive value, defines number days date provided time column presence considered (e.g. 20 days means presence considered time steps equivalent plus minus twenty days date).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs_time.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","text":"object class tibble::tibble. presences returned, presence level set reference (match expectations yardstick package considers first level event)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":null,"dir":"Reference","previous_headings":"","what":"Metric set for SDM — sdm_metric_set","title":"Metric set for SDM — sdm_metric_set","text":"function returns yardstick::metric_set includes boyce_cont(), yardstick::roc_auc() tss_max(), commonly used metrics SDM.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Metric set for SDM — sdm_metric_set","text":"","code":"sdm_metric_set(...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Metric set for SDM — sdm_metric_set","text":"... additional metrics added yardstick::metric_set. See help yardstick::metric_set() constraints type metrics can mixed.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Metric set for SDM — sdm_metric_set","text":"yardstick::metric_set object.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Metric set for SDM — sdm_metric_set","text":"","code":"sdm_metric_set() #> A metric set, consisting of: #> - `boyce_cont()`, a probability metric | direction: maximize #> - `roc_auc()`, a probability metric | direction: maximize #> - `tss_max()`, a probability metric | direction: maximize sdm_metric_set(accuracy) #> A metric set, consisting of: #> - `boyce_cont()`, a probability metric | direction: maximize #> - `roc_auc()`, a probability metric | direction: maximize #> - `tss_max()`, a probability metric | direction: maximize #> - `accuracy()`, a class metric | direction: maximize"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"function returns parsnip::model_spec Boosted Trees model used classifier presences absences Species Distribution Model. uses library xgboost fit boosted trees; use another library, simply build parsnip::model_spec directly.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"","code":"sdm_spec_boost_tree(..., tune = c(\"sdm\", \"all\", \"custom\", \"none\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"... parameters passed parsnip::boost_tree() customise model. See help function details. tune character defining tuning strategy. Valid strategies : \"sdm\" chooses hyperparameters important tune sdm (boost_tree: 'mtry', 'trees', 'tree_depth', 'learn_rate', 'loss_reduction', 'stop_iter') \"\" tunes hyperparameters (boost_tree: 'mtry', 'trees', 'tree_depth', 'learn_rate', 'loss_reduction', 'stop_iter','min_n' 'sample_size') \"custom\" passes options '...' \"none\" tune hyperparameter","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"parsnip::model_spec model.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"","code":"standard_bt_spec <- sdm_spec_boost_tree() full_bt_spec <- sdm_spec_boost_tree(tune = \"all\") custom_bt_spec <- sdm_spec_boost_tree(tune = \"custom\", mtry = tune())"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a GAM for SDM — sdm_spec_gam","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"function returns parsnip::model_spec General Additive Model used classifier presences absences Species Distribution Model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"","code":"sdm_spec_gam(..., tune = \"none\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"... parameters passed parsnip::gen_additive_mod() customise model. See help function details. tune character defining tuning strategy. hyperparameters tune gam, valid option \"none\". parameter present consistency sdm_spec_* functions, nothing case.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"parsnip::model_spec model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"Note , using GAMs workflow_set(), necessary update model gam_formula() (see parsnip::model_formula discussion formulas special terms tidymodels):","code":"workflow_set( preproc = list(default = my_recipe), models = list(gam = sdm_spec_gam()), cross = TRUE ) %>% update_workflow_model(\"default_gam\", spec = sdm_spec_gam(), formula = gam_formula(my_recipe))"},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"","code":"my_gam_spec <- sdm_spec_gam()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a GLM for SDM — sdm_spec_glm","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"function returns parsnip::model_spec Generalised Linear Model used classifier presences absences Species Distribution Model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"","code":"sdm_spec_glm(..., tune = \"none\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"... parameters passed parsnip::logistic_reg() customise model. See help function details. tune character defining tuning strategy. hyperparameters tune glm, valid option \"none\". parameter present consistency sdm_spec_* functions, nothing case.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"parsnip::model_spec model.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"","code":"my_spec_glm <- sdm_spec_glm()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"function returns parsnip::model_spec MaxEnt model used Species Distribution Models.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"","code":"sdm_spec_maxent(..., tune = c(\"sdm\", \"all\", \"custom\", \"none\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"... parameters passed maxent() customise model. See help function details. tune character defining tuning strategy. Valid strategies : \"sdm\" chooses hyper-parameters important tune sdm (maxent, 'mtry') \"\" tunes hyperparameters (maxent, 'mtry', 'trees' 'min') \"custom\" passes options '...' \"none\" tune hyperparameter","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"parsnip::model_spec model.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"","code":"test_maxent_spec <- sdm_spec_maxent(tune = \"sdm\") test_maxent_spec #> maxent Model Specification (classification) #> #> Main Arguments: #> feature_classes = tune() #> regularization_multiplier = tune() #> #> Computational engine: maxnet #> # setting specific values sdm_spec_maxent(tune = \"custom\", feature_classes = \"lq\") #> maxent Model Specification (classification) #> #> Main Arguments: #> feature_classes = lq #> #> Computational engine: maxnet #>"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"function returns parsnip::model_spec Random Forest used classifier presences absences Species Distribution Models. uses library ranger fit boosted trees; use another library, simply build parsnip::model_spec directly.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"","code":"sdm_spec_rand_forest(..., tune = c(\"sdm\", \"all\", \"custom\", \"none\")) sdm_spec_rf(..., tune = c(\"sdm\", \"all\", \"custom\", \"none\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"... parameters passed parsnip::rand_forest() customise model. See help function details. tune character defining tuning strategy. Valid strategies : \"sdm\" chooses hyperparameters important tune sdm (rf, 'mtry') \"\" tunes hyperparameters (rf, 'mtry', 'trees' 'min') \"custom\" passes options '...' \"none\" tune hyperparameter","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"parsnip::model_spec model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"sdm_spec_rf() simply short form sm_spec_rand_forest().","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"","code":"test_rf_spec <- sdm_spec_rf(tune = \"sdm\") test_rf_spec #> Random Forest Model Specification (classification) #> #> Main Arguments: #> mtry = tune() #> #> Computational engine: ranger #> # combining tuning with specific values for other hyperparameters sdm_spec_rf(tune = \"sdm\", trees = 100) #> Random Forest Model Specification (classification) #> #> Main Arguments: #> mtry = tune() #> trees = 100 #> #> Computational engine: ranger #>"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/simple_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple ensemble — simple_ensemble","title":"Simple ensemble — simple_ensemble","text":"simple ensemble collection workflows predictions combined simple way (e.g. taking either mean median). Usually workflows consists best version given model algorithm following tuning. workflows fitted full training dataset making predictions.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/simple_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple ensemble — simple_ensemble","text":"","code":"simple_ensemble(...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/simple_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple ensemble — simple_ensemble","text":"... used, function just creates empty simple_ensemble object. Members added add_best_candidates()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/simple_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple ensemble — simple_ensemble","text":"empty simple_ensemble. tibble columns: wflow_id: name workflows best model chosen workflow: trained workflow objects metrics: metrics based crossvalidation resampling used tune models","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"spatial_initial_split creates single binary split data training set testing set. strategies package spatialsample available; random split strategy used generate initial split.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"","code":"spatial_initial_split(data, prop, strategy, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"data dataset (data.frame tibble) prop proportion data retained modelling/analysis. strategy sampling strategy spatialsample ... parameters passed strategy","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"rsplit object can used rsample::training rsample::testing functions extract data split.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"","code":"set.seed(123) block_initial <- spatial_initial_split(boston_canopy, prop = 1 / 5, spatial_block_cv) testing(block_initial) #> Simple feature collection with 153 features and 18 fields #> Geometry type: MULTIPOLYGON #> Dimension: XY #> Bounding box: xmin: 745098 ymin: 2915630 xmax: 805045.8 ymax: 2969840 #> Projected CRS: NAD83 / Massachusetts Mainland (ftUS) #> # A tibble: 153 × 19 #> grid_id land_area canopy_gain canopy_loss canopy_no_change canopy_area_2014 #> #> 1 M-9 2690727. 52443. 53467. 304239. 357706. #> 2 Q-21 2690727. 54712. 101816. 1359305. 1461121. #> 3 AB-23 725043. 13737. 13278. 52628. 65906. #> 4 AC-15 1175032. 24517. 24010. 111148. 135158. #> 5 U-25 2691491. 83740. 117496. 601040. 718536. #> 6 Y-13 2691490. 79215. 41676. 312299. 353975. #> 7 M-10 2578879. 27026. 41240. 161115. 202355. #> 8 T-22 2691490. 80929. 140490. 573628. 714118. #> 9 AO-16 1717547. 64863. 52390. 465563. 517953. #> 10 X-23 2690728. 85198. 109044. 458205. 567249. #> # ℹ 143 more rows #> # ℹ 13 more variables: canopy_area_2019 , change_canopy_area , #> # change_canopy_percentage , canopy_percentage_2014 , #> # canopy_percentage_2019 , change_canopy_absolute , #> # mean_temp_morning , mean_temp_evening , mean_temp , #> # mean_heat_index_morning , mean_heat_index_evening , #> # mean_heat_index , geometry training(block_initial) #> Simple feature collection with 529 features and 18 fields #> Geometry type: MULTIPOLYGON #> Dimension: XY #> Bounding box: xmin: 739826.9 ymin: 2908294 xmax: 812069.7 ymax: 2970073 #> Projected CRS: NAD83 / Massachusetts Mainland (ftUS) #> # A tibble: 529 × 19 #> grid_id land_area canopy_gain canopy_loss canopy_no_change canopy_area_2014 #> #> 1 AB-4 795045. 15323. 3126. 53676. 56802. #> 2 I-33 265813. 8849. 11795. 78677. 90472. #> 3 AO-9 270153 6187. 1184. 26930. 28114. #> 4 H-10 2691490. 73098. 80362. 345823. 426185. #> 5 V-7 107890. 219. 3612. 240. 3852. #> 6 Q-22 2648089. 122211. 154236. 1026632. 1180868. #> 7 X-4 848558. 8275. 1760. 6872. 8632. #> 8 P-18 2690726. 110928. 113146. 915137. 1028283. #> 9 J-29 2574479. 38069. 15530. 2388638. 2404168. #> 10 G-28 2641525. 87024. 39246. 1202528. 1241774. #> # ℹ 519 more rows #> # ℹ 13 more variables: canopy_area_2019 , change_canopy_area , #> # change_canopy_percentage , canopy_percentage_2014 , #> # canopy_percentage_2019 , change_canopy_absolute , #> # mean_temp_morning , mean_temp_evening , mean_temp , #> # mean_heat_index_morning , mean_heat_index_evening , #> # mean_heat_index , geometry "},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":null,"dir":"Reference","previous_headings":"","what":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"function thins dataset one observation per cell retained.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"","code":"thin_by_cell(data, raster, coords = NULL, drop_na = TRUE, agg_fact = NULL)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster object defined grid coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") drop_na boolean whether locations NA raster dropped. agg_fact positive integer. Aggregation factor expressed number cells direction (horizontally vertically). two integers (horizontal vertical aggregation factor) three integers (also aggregating layers). Defaults NULL, implies aggregation (.e. thinning done grid raster)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"object class sf::sf data.frame, \"data\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"thinning can achieved aggregating cells raster thinning, achieved setting agg_fact > 1 (aggregation works manner equivalent terra::aggregate()).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":null,"dir":"Reference","previous_headings":"","what":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"function thins dataset one observation per cell per time slice retained. use raster layers time slices define data cube thinning enforced (see details time formatted).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"","code":"thin_by_cell_time( data, raster, coords = NULL, time_col = \"time\", lubridate_fun = c, drop_na = TRUE, agg_fact = NULL )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster object defined grid layers corresponding time slices (times set either POSIXlt \"years\", see terra::time() details), terra::SpatRasterDataset first dataset used (, times dataset set either POSIXlt \"years\") terra::time() coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") time_col name column time; time lubridate object, use lubridate_fun provide function can used convert appropriately lubridate_fun function convert time column lubridate object drop_na boolean whether locations NA raster dropped. agg_fact positive integer. Aggregation factor expressed number cells direction (horizontally vertically). two integers (horizontal vertical aggregation factor) three integers (also aggregating layers). Defaults NULL, implies aggregation (.e. thinning done grid raster)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"object class sf::sf data.frame, \"data\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"spatial thinning can achieved aggregating cells raster thinning, achieved setting agg_fact > 1 (aggregation works manner equivalent terra::aggregate()).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":null,"dir":"Reference","previous_headings":"","what":"Thin points dataset based on geographic distance — thin_by_dist","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"function thins dataset observations distance greater \"dist_min\" retained.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"","code":"thin_by_dist(data, dist_min, coords = NULL)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). dist_min Minimum distance points (units appropriate projection, meters lonlat data). coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\")","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"object class sf::sf data.frame, \"data\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"Distances measured appropriate units projection used. case raw latitude longitude (e.g. provided data.frame), crs set WGS84, units set meters. function modified version algorithm spThin, adapted work sf objects.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":null,"dir":"Reference","previous_headings":"","what":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"function thins dataset observations distance greater \"dist_min\" space \"interval_min\" time retained.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"","code":"thin_by_dist_time( data, dist_min, interval_min, coords = NULL, time_col = \"time\", lubridate_fun = c )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). dist_min Minimum distance points (units appropriate projection, meters lonlat data). interval_min Minimum time interval points, days. coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") time_col name column time; time lubridate object, use lubridate_fun provide function can used convert appropriately lubridate_fun function convert time column lubridate object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"object class sf::sf data.frame, \"data\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"Geographic distances measured appropriate units projection used. case raw latitude longitude (e.g. provided data.frame), crs set WGS84, units set meters. Time interval estimated days. Note long time period, simple conversion x years = 365 * x days might lead slightly shorter intervals expected, ignores leap years. function y2d() provides closer approximation. function algorithm analogous spThin, exception neighbours defined terms space time.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tidysdm-package.html","id":null,"dir":"Reference","previous_headings":"","what":"tidysdm: Species Distribution Models with Tidymodels — tidysdm-package","title":"tidysdm: Species Distribution Models with Tidymodels — tidysdm-package","text":"Fit species distribution models (SDMs) using 'tidymodels' framework, provides standardised interface define models process outputs. 'tidysdm' expands 'tidymodels' providing methods spatial objects, models metrics specific SDMs, well number specialised functions process occurrences contemporary palaeo datasets. full functionalities package described Leonardi et al. (2023) doi:10.1101/2023.07.24.550358 .","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tidysdm-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"tidysdm: Species Distribution Models with Tidymodels — tidysdm-package","text":"Maintainer: Andrea Manica am315@cam.ac.uk Authors: Michela Leonardi Margherita Colucci Andrea Vittorio Pozzi Eleanor M.L. Scerri","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":null,"dir":"Reference","previous_headings":"","what":"TSS - True Skill Statistics — tss","title":"TSS - True Skill Statistics — tss","text":"True Skills Statistic, defined ","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"TSS - True Skill Statistics — tss","text":"","code":"tss(data, ...) # S3 method for class 'data.frame' tss( data, truth, estimate, estimator = NULL, na_rm = TRUE, case_weights = NULL, event_level = \"first\", ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"TSS - True Skill Statistics — tss","text":"data Either data.frame containing columns specified truth estimate arguments, table/matrix true class results columns table. ... currently used. truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimate column identifier predicted class results (also factor). truth can specified different ways primary method use unquoted variable name. _vec() functions, factor vector. estimator One : \"binary\", \"macro\", \"macro_weighted\", \"micro\" specify type averaging done. \"binary\" relevant two class case. three general methods calculating multiclass metrics. default automatically choose \"binary\" \"macro\" based estimate. na_rm logical value indicating whether NA values stripped computation proceeds. case_weights optional column identifier case weights. unquoted column name evaluates numeric column data. _vec() functions, numeric vector. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default \"first\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"TSS - True Skill Statistics — tss","text":"tibble columns .metric, .estimator, .estimate 1 row values. grouped data frames, number rows returned number groups.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"TSS - True Skill Statistics — tss","text":"sensitivity+specificity +1 function wrapper around yardstick::j_index(), another name quantity. Note function takes classes predicted model without calibration (.e. making split 0.5 probability). usually metric used Species Distribution Models, threshold recalibrated maximise TSS; purpose, use tss_max().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"TSS - True Skill Statistics — tss","text":"","code":"# Two class data(\"two_class_example\") tss(two_class_example, truth, predicted) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 tss binary 0.673 # Multiclass library(dplyr) data(hpc_cv) # Groups are respected hpc_cv %>% group_by(Resample) %>% tss(obs, pred) #> # A tibble: 10 × 4 #> Resample .metric .estimator .estimate #> #> 1 Fold01 tss macro 0.434 #> 2 Fold02 tss macro 0.422 #> 3 Fold03 tss macro 0.533 #> 4 Fold04 tss macro 0.449 #> 5 Fold05 tss macro 0.431 #> 6 Fold06 tss macro 0.413 #> 7 Fold07 tss macro 0.398 #> 8 Fold08 tss macro 0.468 #> 9 Fold09 tss macro 0.435 #> 10 Fold10 tss macro 0.412"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum TSS - True Skill Statistics — tss_max","title":"Maximum TSS - True Skill Statistics — tss_max","text":"True Skills Statistic, defined ","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum TSS - True Skill Statistics — tss_max","text":"","code":"tss_max(data, ...) # S3 method for class 'data.frame' tss_max( data, truth, ..., estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL ) # S3 method for class 'sf' tss_max(data, ...) tss_max_vec( truth, estimate, estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum TSS - True Skill Statistics — tss_max","text":"data Either data.frame containing columns specified truth estimate arguments, table/matrix true class results columns table. ... set unquoted column names one dplyr selector functions choose variables contain class probabilities. truth binary, 1 column selected, correspond value event_level. Otherwise, many columns factor levels truth ordering columns factor levels truth. truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimator One \"binary\", \"hand_till\", \"macro\", \"macro_weighted\" specify type averaging done. \"binary\" relevant two class case. others general methods calculating multiclass metrics. default automatically choose \"binary\" truth binary, \"hand_till\" truth >2 levels case_weights specified, \"macro\" truth >2 levels case_weights specified (case \"hand_till\" well-defined). na_rm logical value indicating whether NA values stripped computation proceeds. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default uses internal helper generally defaults \"first\" case_weights optional column identifier case weights. unquoted column name evaluates numeric column data. _vec() functions, numeric vector. estimate truth binary, numeric vector class probabilities corresponding \"relevant\" class. Otherwise, matrix many columns factor levels truth. assumed order levels truth.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum TSS - True Skill Statistics — tss_max","text":"tibble columns .metric, .estimator, .estimate 1 row values. grouped data frames, number rows returned number groups.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum TSS - True Skill Statistics — tss_max","text":"sensitivity+specificity +1 function calibrates probability threshold classify presences maximise TSS. multiclass version function, operates binary predictions (e.g. presences absences SDMs).","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum TSS - True Skill Statistics — tss_max","text":"","code":"tss_max(two_class_example, truth, Class1) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 tss_max binary 0.728"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert a time interval from years to days — y2d","title":"Convert a time interval from years to days — y2d","text":"function takes takes time interval years converts days, unit commonly used time operations R. simple conversion x * 365 work large number years, due presence leap years.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert a time interval from years to days — y2d","text":"","code":"y2d(x)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert a time interval from years to days — y2d","text":"x number years interval","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert a time interval from years to days — y2d","text":"difftime object (days)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert a time interval from years to days — y2d","text":"","code":"y2d(1) #> Time difference of 365 days y2d(1000) #> Time difference of 365243 days"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-095","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.5","title":"tidysdm 0.9.5","text":"CRAN release: 2024-06-23 implement clamping MESS manage extrapolation clearly separate sampling background vs pseudo-absences update vignettes","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-094","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.4","title":"tidysdm 0.9.4","text":"CRAN release: 2024-03-05 fix predict* functions prevented fixed threshold used assign classes ensure compatibility upcoming changes tune","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-093","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.3","title":"tidysdm 0.9.3","text":"CRAN release: 2024-01-17 fix bug filter_high_cor due changes terra 1.6.75 implement collect_metrics ensembles.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-092","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.2","title":"tidysdm 0.9.2","text":"CRAN release: 2023-11-13 Release CRAN","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-091","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.1","title":"tidysdm 0.9.1","text":"Add spatial_recipe class. BREAKING change makes previously saved objects unusable, old code work expected. Additional articles showing use additional tidymodels features, debug errors. Integration DALEX explain models. functions select variables.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-090","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.0","title":"tidysdm 0.9.0","text":"Initial release GitHub.","code":""}] +[{"path":"https://evolecolgroup.github.io/tidysdm/dev/CODE_OF_CONDUCT.html","id":null,"dir":"","previous_headings":"","what":"Contributor Code of Conduct","title":"Contributor Code of Conduct","text":"contributors maintainers project, pledge respect people contribute reporting issues, posting feature requests, updating documentation, submitting pull requests patches, activities. committed making participation project harassment-free experience everyone, regardless level experience, gender, gender identity expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion. Examples unacceptable behavior participants include use sexual language imagery, derogatory comments personal attacks, trolling, public private harassment, insults, unprofessional conduct. Project maintainers right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct. Project maintainers follow Code Conduct may removed project team. Instances abusive, harassing, otherwise unacceptable behavior may reported opening issue contacting one project maintainers. Code Conduct adapted Contributor Covenant (https://www.contributor-covenant.org), version 1.0.0, available https://contributor-covenant.org/version/1/0/0/.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to tidysdm","title":"Contributing to tidysdm","text":"document outlines contribute development tidysdm. package maintained voluntary basis, help always appreciated.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"the-basic-process-of-contributing","dir":"","previous_headings":"","what":"The basic process of contributing","title":"Contributing to tidysdm","text":"Development work tidysdm occurs dev branch. , want propose changes, work dev. Start forking project onto github repository, make changes directly fork (either dev branch, make custom branch). updating documentation checking tests pass (see ), start Pull Request. proposed changes reviewed, might asked fix/improve code. can iterative process, requiring rounds revision depending complexity code. Functions documented using roxygen. changes affects documentation , rebuild . root directory package, simply run: implemented new functionality, patched bug, consider whether add appropriate unit test. tidysdm uses testthat framework unit tests. make sure tests work : Finally, submit push request, check changes don’t break build. can check, also builds vignette runs tests.: Make sure resolved warnings notes raised devtools::check()! followed 3 steps, ready make Pull Request. changes go automatic continuous integration, check impact changes multiple platforms. everything goes well, see green tick submission.","code":"devtools::document() devtools::test() devtools::check()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to tidysdm","text":"spot typos, spelling mistakes, grammatical errors documentation, fix directly file describes function. .R file R directory, .Rd file man directory. .Rd files automatically generated roxygen2 edited hand. recommend study first roxygen2 comments work.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"functional-changes","dir":"","previous_headings":"","what":"Functional changes","title":"Contributing to tidysdm","text":"want make change impacts functioning tidysdm, ’s good idea first file issue explaining mind. change meant fix bug, add minimal reprex. good reprex also perfect starting point writing unit test, accompany functional change code. Unit tests also essential fixing bugs, can demonstrate fix work, prevent future changes undoing work. unit testing, use testthat; find tests tests, file dedicated function, following convention test_my_function.R naming files. creating tests, try make use built-datasets, rather adding data files package. Ideally, body Pull Request include phrase Fixes #issue-number, issue_number number Github. way, Pull Request automatically linked issue, issue closed Pull Request merged . user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html. continuous integration checks Pull Request reduce test coverage.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"code-style","dir":"","previous_headings":"Functional changes","what":"Code style","title":"Contributing to tidysdm","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. Lots commenting code helps mantainability; , doubt, always add explanation new code.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to tidysdm","text":"Please note tidyverse project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"GNU Affero General Public License","title":"GNU Affero General Public License","text":"Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc.  Everyone permitted copy distribute verbatim copies license document, changing allowed.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"preamble","dir":"","previous_headings":"","what":"Preamble","title":"GNU Affero General Public License","text":"GNU Affero General Public License free, copyleft license software kinds works, specifically designed ensure cooperation community case network server software. licenses software practical works designed take away freedom share change works. contrast, General Public Licenses intended guarantee freedom share change versions program–make sure remains free software users. speak free software, referring freedom, price. General Public Licenses designed make sure freedom distribute copies free software (charge wish), receive source code can get want , can change software use pieces new free programs, know can things. Developers use General Public Licenses protect rights two steps: (1) assert copyright software, (2) offer License gives legal permission copy, distribute /modify software. secondary benefit defending users’ freedom improvements made alternate versions program, receive widespread use, become available developers incorporate. Many developers free software heartened encouraged resulting cooperation. However, case software used network servers, result may fail come . GNU General Public License permits making modified version letting public access server without ever releasing source code public. GNU Affero General Public License designed specifically ensure , cases, modified source code becomes available community. requires operator network server provide source code modified version running users server. Therefore, public use modified version, publicly accessible server, gives public access source code modified version. older license, called Affero General Public License published Affero, designed accomplish similar goals. different license, version Affero GPL, Affero released new version Affero GPL permits relicensing license. precise terms conditions copying, distribution modification follow.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_0-definitions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"0. Definitions.","title":"GNU Affero General Public License","text":"“License” refers version 3 GNU Affero General Public License. “Copyright” also means copyright-like laws apply kinds works, semiconductor masks. “Program” refers copyrightable work licensed License. licensee addressed “”. “Licensees” “recipients” may individuals organizations. “modify” work means copy adapt part work fashion requiring copyright permission, making exact copy. resulting work called “modified version” earlier work work “based ” earlier work. “covered work” means either unmodified Program work based Program. “propagate” work means anything , without permission, make directly secondarily liable infringement applicable copyright law, except executing computer modifying private copy. Propagation includes copying, distribution (without modification), making available public, countries activities well. “convey” work means kind propagation enables parties make receive copies. Mere interaction user computer network, transfer copy, conveying. interactive user interface displays “Appropriate Legal Notices” extent includes convenient prominently visible feature (1) displays appropriate copyright notice, (2) tells user warranty work (except extent warranties provided), licensees may convey work License, view copy License. interface presents list user commands options, menu, prominent item list meets criterion.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_1-source-code","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"1. Source Code.","title":"GNU Affero General Public License","text":"“source code” work means preferred form work making modifications . “Object code” means non-source form work. “Standard Interface” means interface either official standard defined recognized standards body, , case interfaces specified particular programming language, one widely used among developers working language. “System Libraries” executable work include anything, work whole, () included normal form packaging Major Component, part Major Component, (b) serves enable use work Major Component, implement Standard Interface implementation available public source code form. “Major Component”, context, means major essential component (kernel, window system, ) specific operating system () executable work runs, compiler used produce work, object code interpreter used run . “Corresponding Source” work object code form means source code needed generate, install, (executable work) run object code modify work, including scripts control activities. However, include work’s System Libraries, general-purpose tools generally available free programs used unmodified performing activities part work. example, Corresponding Source includes interface definition files associated source files work, source code shared libraries dynamically linked subprograms work specifically designed require, intimate data communication control flow subprograms parts work. Corresponding Source need include anything users can regenerate automatically parts Corresponding Source. Corresponding Source work source code form work.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_2-basic-permissions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"2. Basic Permissions.","title":"GNU Affero General Public License","text":"rights granted License granted term copyright Program, irrevocable provided stated conditions met. License explicitly affirms unlimited permission run unmodified Program. output running covered work covered License output, given content, constitutes covered work. License acknowledges rights fair use equivalent, provided copyright law. may make, run propagate covered works convey, without conditions long license otherwise remains force. may convey covered works others sole purpose make modifications exclusively , provide facilities running works, provided comply terms License conveying material control copyright. thus making running covered works must exclusively behalf, direction control, terms prohibit making copies copyrighted material outside relationship . Conveying circumstances permitted solely conditions stated . Sublicensing allowed; section 10 makes unnecessary.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_3-protecting-users-legal-rights-from-anti-circumvention-law","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"3. Protecting Users’ Legal Rights From Anti-Circumvention Law.","title":"GNU Affero General Public License","text":"covered work shall deemed part effective technological measure applicable law fulfilling obligations article 11 WIPO copyright treaty adopted 20 December 1996, similar laws prohibiting restricting circumvention measures. convey covered work, waive legal power forbid circumvention technological measures extent circumvention effected exercising rights License respect covered work, disclaim intention limit operation modification work means enforcing, work’s users, third parties’ legal rights forbid circumvention technological measures.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_4-conveying-verbatim-copies","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"4. Conveying Verbatim Copies.","title":"GNU Affero General Public License","text":"may convey verbatim copies Program’s source code receive , medium, provided conspicuously appropriately publish copy appropriate copyright notice; keep intact notices stating License non-permissive terms added accord section 7 apply code; keep intact notices absence warranty; give recipients copy License along Program. may charge price price copy convey, may offer support warranty protection fee.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_5-conveying-modified-source-versions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"5. Conveying Modified Source Versions.","title":"GNU Affero General Public License","text":"may convey work based Program, modifications produce Program, form source code terms section 4, provided also meet conditions: work must carry prominent notices stating modified , giving relevant date. work must carry prominent notices stating released License conditions added section 7. requirement modifies requirement section 4 “keep intact notices”. must license entire work, whole, License anyone comes possession copy. License therefore apply, along applicable section 7 additional terms, whole work, parts, regardless packaged. License gives permission license work way, invalidate permission separately received . work interactive user interfaces, must display Appropriate Legal Notices; however, Program interactive interfaces display Appropriate Legal Notices, work need make . compilation covered work separate independent works, nature extensions covered work, combined form larger program, volume storage distribution medium, called “aggregate” compilation resulting copyright used limit access legal rights compilation’s users beyond individual works permit. Inclusion covered work aggregate cause License apply parts aggregate.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_6-conveying-non-source-forms","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"6. Conveying Non-Source Forms.","title":"GNU Affero General Public License","text":"may convey covered work object code form terms sections 4 5, provided also convey machine-readable Corresponding Source terms License, one ways: Convey object code , embodied , physical product (including physical distribution medium), accompanied Corresponding Source fixed durable physical medium customarily used software interchange. Convey object code , embodied , physical product (including physical distribution medium), accompanied written offer, valid least three years valid long offer spare parts customer support product model, give anyone possesses object code either (1) copy Corresponding Source software product covered License, durable physical medium customarily used software interchange, price reasonable cost physically performing conveying source, (2) access copy Corresponding Source network server charge. Convey individual copies object code copy written offer provide Corresponding Source. alternative allowed occasionally noncommercially, received object code offer, accord subsection 6b. Convey object code offering access designated place (gratis charge), offer equivalent access Corresponding Source way place charge. need require recipients copy Corresponding Source along object code. place copy object code network server, Corresponding Source may different server (operated third party) supports equivalent copying facilities, provided maintain clear directions next object code saying find Corresponding Source. Regardless server hosts Corresponding Source, remain obligated ensure available long needed satisfy requirements. Convey object code using peer--peer transmission, provided inform peers object code Corresponding Source work offered general public charge subsection 6d. separable portion object code, whose source code excluded Corresponding Source System Library, need included conveying object code work. “User Product” either (1) “consumer product”, means tangible personal property normally used personal, family, household purposes, (2) anything designed sold incorporation dwelling. determining whether product consumer product, doubtful cases shall resolved favor coverage. particular product received particular user, “normally used” refers typical common use class product, regardless status particular user way particular user actually uses, expects expected use, product. product consumer product regardless whether product substantial commercial, industrial non-consumer uses, unless uses represent significant mode use product. “Installation Information” User Product means methods, procedures, authorization keys, information required install execute modified versions covered work User Product modified version Corresponding Source. information must suffice ensure continued functioning modified object code case prevented interfered solely modification made. convey object code work section , , specifically use , User Product, conveying occurs part transaction right possession use User Product transferred recipient perpetuity fixed term (regardless transaction characterized), Corresponding Source conveyed section must accompanied Installation Information. requirement apply neither third party retains ability install modified object code User Product (example, work installed ROM). requirement provide Installation Information include requirement continue provide support service, warranty, updates work modified installed recipient, User Product modified installed. Access network may denied modification materially adversely affects operation network violates rules protocols communication across network. Corresponding Source conveyed, Installation Information provided, accord section must format publicly documented (implementation available public source code form), must require special password key unpacking, reading copying.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_7-additional-terms","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"7. Additional Terms.","title":"GNU Affero General Public License","text":"“Additional permissions” terms supplement terms License making exceptions one conditions. Additional permissions applicable entire Program shall treated though included License, extent valid applicable law. additional permissions apply part Program, part may used separately permissions, entire Program remains governed License without regard additional permissions. convey copy covered work, may option remove additional permissions copy, part . (Additional permissions may written require removal certain cases modify work.) may place additional permissions material, added covered work, can give appropriate copyright permission. Notwithstanding provision License, material add covered work, may (authorized copyright holders material) supplement terms License terms: Disclaiming warranty limiting liability differently terms sections 15 16 License; Requiring preservation specified reasonable legal notices author attributions material Appropriate Legal Notices displayed works containing ; Prohibiting misrepresentation origin material, requiring modified versions material marked reasonable ways different original version; Limiting use publicity purposes names licensors authors material; Declining grant rights trademark law use trade names, trademarks, service marks; Requiring indemnification licensors authors material anyone conveys material (modified versions ) contractual assumptions liability recipient, liability contractual assumptions directly impose licensors authors. non-permissive additional terms considered “restrictions” within meaning section 10. Program received , part , contains notice stating governed License along term restriction, may remove term. license document contains restriction permits relicensing conveying License, may add covered work material governed terms license document, provided restriction survive relicensing conveying. add terms covered work accord section, must place, relevant source files, statement additional terms apply files, notice indicating find applicable terms. Additional terms, permissive non-permissive, may stated form separately written license, stated exceptions; requirements apply either way.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_8-termination","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"8. Termination.","title":"GNU Affero General Public License","text":"may propagate modify covered work except expressly provided License. attempt otherwise propagate modify void, automatically terminate rights License (including patent licenses granted third paragraph section 11). However, cease violation License, license particular copyright holder reinstated () provisionally, unless copyright holder explicitly finally terminates license, (b) permanently, copyright holder fails notify violation reasonable means prior 60 days cessation. Moreover, license particular copyright holder reinstated permanently copyright holder notifies violation reasonable means, first time received notice violation License (work) copyright holder, cure violation prior 30 days receipt notice. Termination rights section terminate licenses parties received copies rights License. rights terminated permanently reinstated, qualify receive new licenses material section 10.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_9-acceptance-not-required-for-having-copies","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"9. Acceptance Not Required for Having Copies.","title":"GNU Affero General Public License","text":"required accept License order receive run copy Program. Ancillary propagation covered work occurring solely consequence using peer--peer transmission receive copy likewise require acceptance. However, nothing License grants permission propagate modify covered work. actions infringe copyright accept License. Therefore, modifying propagating covered work, indicate acceptance License .","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_10-automatic-licensing-of-downstream-recipients","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"10. Automatic Licensing of Downstream Recipients.","title":"GNU Affero General Public License","text":"time convey covered work, recipient automatically receives license original licensors, run, modify propagate work, subject License. responsible enforcing compliance third parties License. “entity transaction” transaction transferring control organization, substantially assets one, subdividing organization, merging organizations. propagation covered work results entity transaction, party transaction receives copy work also receives whatever licenses work party’s predecessor interest give previous paragraph, plus right possession Corresponding Source work predecessor interest, predecessor can get reasonable efforts. may impose restrictions exercise rights granted affirmed License. example, may impose license fee, royalty, charge exercise rights granted License, may initiate litigation (including cross-claim counterclaim lawsuit) alleging patent claim infringed making, using, selling, offering sale, importing Program portion .","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_11-patents","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"11. Patents.","title":"GNU Affero General Public License","text":"“contributor” copyright holder authorizes use License Program work Program based. work thus licensed called contributor’s “contributor version”. contributor’s “essential patent claims” patent claims owned controlled contributor, whether already acquired hereafter acquired, infringed manner, permitted License, making, using, selling contributor version, include claims infringed consequence modification contributor version. purposes definition, “control” includes right grant patent sublicenses manner consistent requirements License. contributor grants non-exclusive, worldwide, royalty-free patent license contributor’s essential patent claims, make, use, sell, offer sale, import otherwise run, modify propagate contents contributor version. following three paragraphs, “patent license” express agreement commitment, however denominated, enforce patent (express permission practice patent covenant sue patent infringement). “grant” patent license party means make agreement commitment enforce patent party. convey covered work, knowingly relying patent license, Corresponding Source work available anyone copy, free charge terms License, publicly available network server readily accessible means, must either (1) cause Corresponding Source available, (2) arrange deprive benefit patent license particular work, (3) arrange, manner consistent requirements License, extend patent license downstream recipients. “Knowingly relying” means actual knowledge , patent license, conveying covered work country, recipient’s use covered work country, infringe one identifiable patents country reason believe valid. , pursuant connection single transaction arrangement, convey, propagate procuring conveyance , covered work, grant patent license parties receiving covered work authorizing use, propagate, modify convey specific copy covered work, patent license grant automatically extended recipients covered work works based . patent license “discriminatory” include within scope coverage, prohibits exercise , conditioned non-exercise one rights specifically granted License. may convey covered work party arrangement third party business distributing software, make payment third party based extent activity conveying work, third party grants, parties receive covered work , discriminatory patent license () connection copies covered work conveyed (copies made copies), (b) primarily connection specific products compilations contain covered work, unless entered arrangement, patent license granted, prior 28 March 2007. Nothing License shall construed excluding limiting implied license defenses infringement may otherwise available applicable patent law.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_12-no-surrender-of-others-freedom","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"12. No Surrender of Others’ Freedom.","title":"GNU Affero General Public License","text":"conditions imposed (whether court order, agreement otherwise) contradict conditions License, excuse conditions License. convey covered work satisfy simultaneously obligations License pertinent obligations, consequence may convey . example, agree terms obligate collect royalty conveying convey Program, way satisfy terms License refrain entirely conveying Program.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_13-remote-network-interaction-use-with-the-gnu-general-public-license","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"13. Remote Network Interaction; Use with the GNU General Public License.","title":"GNU Affero General Public License","text":"Notwithstanding provision License, modify Program, modified version must prominently offer users interacting remotely computer network (version supports interaction) opportunity receive Corresponding Source version providing access Corresponding Source network server charge, standard customary means facilitating copying software. Corresponding Source shall include Corresponding Source work covered version 3 GNU General Public License incorporated pursuant following paragraph. Notwithstanding provision License, permission link combine covered work work licensed version 3 GNU General Public License single combined work, convey resulting work. terms License continue apply part covered work, work combined remain governed version 3 GNU General Public License.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_14-revised-versions-of-this-license","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"14. Revised Versions of this License.","title":"GNU Affero General Public License","text":"Free Software Foundation may publish revised /new versions GNU Affero General Public License time time. new versions similar spirit present version, may differ detail address new problems concerns. version given distinguishing version number. Program specifies certain numbered version GNU Affero General Public License “later version” applies , option following terms conditions either numbered version later version published Free Software Foundation. Program specify version number GNU Affero General Public License, may choose version ever published Free Software Foundation. Program specifies proxy can decide future versions GNU Affero General Public License can used, proxy’s public statement acceptance version permanently authorizes choose version Program. Later license versions may give additional different permissions. However, additional obligations imposed author copyright holder result choosing follow later version.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_15-disclaimer-of-warranty","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"15. Disclaimer of Warranty.","title":"GNU Affero General Public License","text":"WARRANTY PROGRAM, EXTENT PERMITTED APPLICABLE LAW. EXCEPT OTHERWISE STATED WRITING COPYRIGHT HOLDERS /PARTIES PROVIDE PROGRAM “” WITHOUT WARRANTY KIND, EITHER EXPRESSED IMPLIED, INCLUDING, LIMITED , IMPLIED WARRANTIES MERCHANTABILITY FITNESS PARTICULAR PURPOSE. ENTIRE RISK QUALITY PERFORMANCE PROGRAM . PROGRAM PROVE DEFECTIVE, ASSUME COST NECESSARY SERVICING, REPAIR CORRECTION.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_16-limitation-of-liability","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"16. Limitation of Liability.","title":"GNU Affero General Public License","text":"EVENT UNLESS REQUIRED APPLICABLE LAW AGREED WRITING COPYRIGHT HOLDER, PARTY MODIFIES /CONVEYS PROGRAM PERMITTED , LIABLE DAMAGES, INCLUDING GENERAL, SPECIAL, INCIDENTAL CONSEQUENTIAL DAMAGES ARISING USE INABILITY USE PROGRAM (INCLUDING LIMITED LOSS DATA DATA RENDERED INACCURATE LOSSES SUSTAINED THIRD PARTIES FAILURE PROGRAM OPERATE PROGRAMS), EVEN HOLDER PARTY ADVISED POSSIBILITY DAMAGES.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"id_17-interpretation-of-sections-15-and-16","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"17. Interpretation of Sections 15 and 16.","title":"GNU Affero General Public License","text":"disclaimer warranty limitation liability provided given local legal effect according terms, reviewing courts shall apply local law closely approximates absolute waiver civil liability connection Program, unless warranty assumption liability accompanies copy Program return fee. END TERMS CONDITIONS","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/LICENSE.html","id":"how-to-apply-these-terms-to-your-new-programs","dir":"","previous_headings":"","what":"How to Apply These Terms to Your New Programs","title":"GNU Affero General Public License","text":"develop new program, want greatest possible use public, best way achieve make free software everyone can redistribute change terms. , attach following notices program. safest attach start source file effectively state exclusion warranty; file least “copyright” line pointer full notice found. Also add information contact electronic paper mail. software can interact users remotely computer network, also make sure provides way users get source. example, program web application, interface display “Source” link leads users archive code. many ways offer source, different solutions better different programs; see section 13 specific requirements. also get employer (work programmer) school, , sign “copyright disclaimer” program, necessary. information , apply follow GNU AGPL, see https://www.gnu.org/licenses/.","code":" Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see ."},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"sdms-with-tidymodels","dir":"Articles","previous_headings":"","what":"SDMs with tidymodels","title":"tidysdm overview","text":"Species Distribution Modelling relies several algorithms, many number hyperparameters require turning. tidymodels universe includes number packages specifically design fit, tune validate models. advantage tidymodels models syntax results returned users standardised, thus providing coherent interface modelling. Given variety models required SDM, tidymodels ideal framework. tidysdm provides number wrappers specialised functions facilitate fitting SDM tidymodels. article provides overview tidysdm facilitates fitting SDMs. articles, detailing use package palaeodata, fitting complex models troubleshoot models can found tidisdm website. tidysdm relies tidymodels, users advised familiarise introductory tutorials tidymodels website. load tidysdm, automatically loads tidymodels associated packages necessary fit models:","code":"library(tidysdm) #> Loading required package: tidymodels #> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ recipes 1.1.0 #> ✔ dials 1.3.0 ✔ rsample 1.2.1 #> ✔ dplyr 1.1.4 ✔ tibble 3.2.1 #> ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1 #> ✔ infer 1.0.7 ✔ tune 1.2.1 #> ✔ modeldata 1.4.0 ✔ workflows 1.1.4 #> ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0 #> ✔ purrr 1.0.2 ✔ yardstick 1.3.1 #> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── #> ✖ purrr::discard() masks scales::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() #> • Use suppressPackageStartupMessages() to eliminate package startup messages #> Loading required package: spatialsample"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"accessing-the-data-for-this-vignette-how-to-use-rgbif","dir":"Articles","previous_headings":"SDMs with tidymodels","what":"Accessing the data for this vignette: how to use rgbif","title":"tidysdm overview","text":"start reading set presences species lizard inhabits Iberian peninsula, Lacerta schreiberi. data taken GBIF Occurrence Download (6 July 2023) https://doi.org/10.15468/dl.srq3b3. dataset already included tidysdm package: Alternatively, can easily access manipulate dataset using rbgif. Note data GBIF often requires level cleaning. use simple cleaning function CoordinateCleaner; general, recommend inspect data flagged problematic, rather just accepting :","code":"data(lacerta) head(lacerta) #> # A tibble: 6 × 3 #> ID latitude longitude #> #> 1 858029749 42.6 -7.09 #> 2 858029738 42.6 -7.09 #> 3 614631090 41.4 -7.90 #> 4 614631085 41.3 -7.81 #> 5 614631083 41.3 -7.81 #> 6 614631080 41.4 -7.83 # download presences library(rgbif) occ_download_get(key = \"0068808-230530130749713\", path = tempdir()) # read file library(readr) distrib <- read_delim(file.path(tempdir(), \"0068808-230530130749713.zip\")) # keep the necessary columns and rename them lacerta <- distrib %>% select(gbifID, decimalLatitude, decimalLongitude) %>% rename(ID = gbifID, latitude = decimalLatitude, longitude = decimalLongitude) # clean up the data library(CoordinateCleaner) lacerta <- clean_coordinates(x = lacerta, lon = \"longitude\", lat = \"latitude\", species = \"ID\", value = \"clean\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"preparing-your-data","dir":"Articles","previous_headings":"","what":"Preparing your data","title":"tidysdm overview","text":"First, let us visualise presences plotting map. tidysdm works sf objects represent locations, cast coordinates sf object, set projections standard ‘lonlat’ (crs = 4326). usually advisable plot locations directly raster used extract climatic variables, see locations fall within discrete space raster. vignette, use WorldClim source climatic information. access WorldClim data via library pastclim; even though library, name suggests, mostly designed handle palaeoclimatic reconstructions, also provides convenient functions access present day reconstructions future projections. pastclim handy function get land mask available datasets, can use background locations. cut raster Iberian peninsula, lizard lives. simply illustration, bother project raster, equal area projection desirable… plotting, take advantage tidyterra, makes handling terra rasters ggplot breeze.","code":"library(sf) #> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE lacerta <- st_as_sf(lacerta, coords = c(\"longitude\", \"latitude\")) st_crs(lacerta) <- 4326 library(pastclim) download_dataset(dataset = \"WorldClim_2.1_10m\") land_mask <- get_land_mask(time_ce = 1985, dataset = \"WorldClim_2.1_10m\") # Iberia peninsula extension iberia_poly <- terra::vect( \"POLYGON((-9.8 43.3,-7.8 44.1,-2.0 43.7,3.6 42.5,3.8 41.5,1.3 40.8,0.3 39.5, 0.9 38.6,-0.4 37.5,-1.6 36.7,-2.3 36.3,-4.1 36.4,-4.5 36.4,-5.0 36.1, -5.6 36.0,-6.3 36.0,-7.1 36.9,-9.5 36.6,-9.4 38.0,-10.6 38.9,-9.5 40.8, -9.8 43.3))\" ) crs(iberia_poly) <- \"lonlat\" # crop the extent land_mask <- crop(land_mask, iberia_poly) # and mask to the polygon land_mask <- mask(land_mask, iberia_poly) #> Loading required package: terra #> terra 1.7.83 #> #> Attaching package: 'terra' #> The following object is masked from 'package:tidyr': #> #> extract #> The following object is masked from 'package:scales': #> #> rescale #> [1] TRUE library(tidyterra) #> #> Attaching package: 'tidyterra' #> The following object is masked from 'package:stats': #> #> filter library(ggplot2) ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) + geom_sf(data = lacerta) + guides(fill=\"none\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"thinning-step","dir":"Articles","previous_headings":"","what":"Thinning step","title":"tidysdm overview","text":"Now, thin observations one per cell raster (better equal area projection…): Now, thin remove points closer 20km. However, note standard map units ‘lonlat’ projection meters. tidysdm provides convening conversion function, km2m(), avoid write lots zeroes): Let’s see left points: now need select points represent potential available area species. two approaches, can either sample background sample_background(), can generate pseudo-absences sample_pseudoabs(). example, sample background; specifically, attempt account potential sampling biases using target group approach, presences species within taxonomic group used condition sampling background, providing information differential sampling different areas within region interest. start downloading records 8 genera Lacertidae, covering geographic region Iberian peninsula GBIF https://doi.org/10.15468/dl.53js5z: need convert observations raster whose values number records (later used determine likely cell used background point): can see sampling far random, certain locations large number records. can now sample background, using ‘bias’ method represent heterogeneity sampling effort: Let’s see presences background: can use pastclim download WorldClim dataset (’ll use 10 arc-minute resolution) extract bioclimatic variables available (use pastclim, use raster dataset access , loading directly terra). Note dataset covers period 1970-2000, pastclim dates 1985 (midpoint). also cropped directly Iberian peninsula. Note , vignette, focus continuous variables; machine learning algorithms natively cope multi-level factors, possible use recipes::step_dummy() generate dummy variables factors. worked example can found article additional features tidymodels tidysdm. Next, extract climate presences background points: Based paper (https://doi.org/10.1007/s10531-010-9865-2), interested variables: “bio06”, “bio05”, “bio13”, “bio14”, “bio15”. can visualise differences presences background using violin plots: can see variables interest seem different distribution presences background. can formally quantify mismatch two computing overlap: , can see variables interest seem good candidates clear signal. Let us focus variables: Environmental variables often highly correlated, collinearity issue several types models. can inspect correlation among variables : can see variables rather high correlation (e.g. bio05 vs bio14). can subset variables certain threshold correlation (e.g. 0.7) : , removing bio14 leaves us set uncorrelated variables. Note filter_collinear methods based variable inflation also worth exploring. example, remove bio14 work remaining variables.","code":"set.seed(1234567) lacerta <- thin_by_cell(lacerta, raster = land_mask) nrow(lacerta) #> [1] 226 ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) + geom_sf(data = lacerta) + guides(fill=\"none\") set.seed(1234567) lacerta_thin <- thin_by_dist(lacerta, dist_min = km2m(20)) nrow(lacerta_thin) #> [1] 111 ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) + geom_sf(data = lacerta_thin) + guides(fill=\"none\") library(rgbif) occ_download_get(key = \"0121761-240321170329656\", path = tempdir()) library(readr) backg_distrib <- readr::read_delim(file.path(tempdir(), \"0121761-240321170329656.zip\")) # keep the necessary columns lacertidae_background <- backg_distrib %>% select(gbifID, decimalLatitude, decimalLongitude) %>% rename(ID = gbifID, latitude = decimalLatitude, longitude = decimalLongitude) # convert to an sf object lacertidae_background <- st_as_sf(lacertidae_background, coords = c(\"longitude\", \"latitude\")) st_crs(lacertidae_background) <- 4326 lacertidae_background_raster <- rasterize(lacertidae_background, land_mask, fun = \"count\") plot(lacertidae_background_raster) set.seed(1234567) lacerta_thin <- sample_background(data = lacerta_thin, raster = lacertidae_background_raster, n = 3 * nrow(lacerta_thin), method = \"bias\", class_label = \"background\", return_pres = TRUE) ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) + geom_sf(data = lacerta_thin, aes(col = class)) + guides(fill=\"none\") download_dataset(\"WorldClim_2.1_10m\") climate_vars <- get_vars_for_dataset(\"WorldClim_2.1_10m\") climate_present <- pastclim::region_slice( time_ce = 1985, bio_variables = climate_vars, data = \"WorldClim_2.1_10m\", crop = iberia_poly ) lacerta_thin <- lacerta_thin %>% bind_cols(terra::extract(climate_present, lacerta_thin, ID = FALSE)) lacerta_thin %>% plot_pres_vs_bg(class) lacerta_thin %>% dist_pres_vs_bg(class) #> bio09 bio12 bio16 bio19 bio13 bio05 bio10 #> 0.43907819 0.41888524 0.41487381 0.40742724 0.40492411 0.38854703 0.38610145 #> bio02 bio07 bio04 bio08 bio17 bio15 bio18 #> 0.35191109 0.35036167 0.32450555 0.31879785 0.28143659 0.27152095 0.25007068 #> bio01 bio14 bio03 bio11 altitude bio06 #> 0.24589097 0.24294699 0.18414624 0.11169528 0.07271380 0.06742951 suggested_vars <- c(\"bio06\", \"bio05\", \"bio13\", \"bio14\", \"bio15\") pairs(climate_present[[suggested_vars]]) climate_present <- climate_present[[suggested_vars]] vars_uncor <- filter_collinear(climate_present, cutoff = 0.7, method = \"cor_caret\") vars_uncor #> [1] \"bio15\" \"bio05\" \"bio13\" \"bio06\" #> attr(,\"to_remove\") #> [1] \"bio14\" lacerta_thin <- lacerta_thin %>% select(all_of(c(vars_uncor, \"class\"))) climate_present <- climate_present[[vars_uncor]] names(climate_present) # added to highlight which variables are retained in the end #> [1] \"bio15\" \"bio05\" \"bio13\" \"bio06\""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"fit-the-model-by-cross-validation","dir":"Articles","previous_headings":"","what":"Fit the model by cross-validation","title":"tidysdm overview","text":"Next, need set recipe define handle dataset. don’t want anything data terms transformations, just need define formula (class outcome, variables predictors; note , sf objects, geometry automatically replaced X Y columns assigned role coords, thus used predictors): classification models tidymodels, assumption level interest response (case, presences) reference level. can confirm data correctly formatted : now build workflow_set different models, defining hyperparameters want tune. use glm, random forest, boosted_trees maxent models (details use workflow_sets, see tutorial). latter three models tunable hyperparameters. commonly used models, tidysdm automatically chooses important parameters, possible fully customise model specifications (e.g. see help sdm_spec_rf). Note , used GAMs sdm_spec_gam(), necessary update model gam_formula() due non-standard formula notation GAMs (see help sdm_spec_gam() example ). now want set spatial block cross-validation scheme tune assess models. split data creating 3 folds. use spatial_block_cv function package spatialsample. spatialsample offers number sampling approaches spatial data; also possible convert objects created blockCV (offers features spatial sampling, stratified sampling) rsample object suitable tisysdm function blockcv2rsample. can now use block CV folds tune assess models (keep computations fast, explore 3 combination hyperparameters per model; far little real life!): Note workflow_set correctly detects tuning parameters glm. can look performance models : Now let’s create ensemble, selecting best set parameters model (really relevant random forest, hype-parameters tune glm gam). use Boyce continuous index metric choose best random forest boosted tree. adding members ensemble, automatically fitted full training dataset, ready make predictions. visualise tabular form model metrics can obtained :","code":"lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) lacerta_rec #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 4 #> coords: 2 lacerta_thin %>% check_sdm_presence(class) #> [1] TRUE lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf(), # boosted tree model (gbm) specs with tuning gbm = sdm_spec_boost_tree(), # maxent specs with tuning maxent = sdm_spec_maxent() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) library(tidysdm) set.seed(100) #lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) lacerta_cv <- spatial_block_cv(data = lacerta_thin, v = 3, n = 5) autoplot(lacerta_cv) set.seed(1234567) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 4 resampling: default_glm #> ✔ 1 of 4 resampling: default_glm (196ms) #> i 2 of 4 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 2 of 4 tuning: default_rf (826ms) #> i 3 of 4 tuning: default_gbm #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 3 of 4 tuning: default_gbm (4s) #> i 4 of 4 tuning: default_maxent #> ✔ 4 of 4 tuning: default_maxent (1.2s) autoplot(lacerta_models) lacerta_ensemble <- simple_ensemble() %>% add_member(lacerta_models, metric = \"boyce_cont\") lacerta_ensemble #> A simple_ensemble of models #> #> Members: #> • default_glm #> • default_rf #> • default_gbm #> • default_maxent #> #> Available metrics: #> • boyce_cont #> • roc_auc #> • tss_max #> #> Metric used to tune workflows: #> • boyce_cont autoplot(lacerta_ensemble) lacerta_ensemble %>% collect_metrics() #> # A tibble: 12 × 5 #> wflow_id .metric mean std_err n #> #> 1 default_glm boyce_cont 0.573 0.115 3 #> 2 default_glm roc_auc 0.775 0.0138 3 #> 3 default_glm tss_max 0.486 0.0337 3 #> 4 default_rf boyce_cont 0.709 0.0856 3 #> 5 default_rf roc_auc 0.794 0.00648 3 #> 6 default_rf tss_max 0.537 0.0363 3 #> 7 default_gbm boyce_cont 0.659 0.0472 3 #> 8 default_gbm roc_auc 0.789 0.00707 3 #> 9 default_gbm tss_max 0.524 0.0152 3 #> 10 default_maxent boyce_cont 0.651 0.157 3 #> 11 default_maxent roc_auc 0.804 0.00653 3 #> 12 default_maxent tss_max 0.572 0.0111 3"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"projecting-to-the-present","dir":"Articles","previous_headings":"","what":"Projecting to the present","title":"tidysdm overview","text":"can now make predictions ensemble (using default option taking mean predictions model). can subset ensemble use best models, based Boyce continuous index, setting minimum threshold 0.7 metric. also take median available model predictions (instead mean, default). plot change much (models quite consistent). Sometimes, desirable binary predictions (presence vs absence), rather probability occurrence. , first need calibrate threshold used convert probabilities classes (case, optimise TSS): now can predict whole continent:","code":"prediction_present <- predict_raster(lacerta_ensemble, climate_present) ggplot() + geom_spatraster(data = prediction_present, aes(fill = mean)) + scale_fill_terrain_c() + # plot presences used in the model geom_sf(data = lacerta_thin %>% filter(class == \"presence\")) prediction_present_boyce <- predict_raster(lacerta_ensemble, climate_present, metric_thresh = c(\"boyce_cont\", 0.7), fun = \"median\" ) ggplot() + geom_spatraster(data = prediction_present_boyce, aes(fill = median)) + scale_fill_terrain_c() + geom_sf(data = lacerta_thin %>% filter(class == \"presence\")) lacerta_ensemble <- calib_class_thresh(lacerta_ensemble, class_thresh = \"tss_max\", metric_thresh = c(\"boyce_cont\", 0.7) ) prediction_present_binary <- predict_raster(lacerta_ensemble, climate_present, type = \"class\", class_thresh = c(\"tss_max\"), metric_thresh = c(\"boyce_cont\", 0.7) ) ggplot() + geom_spatraster(data = prediction_present_binary, aes(fill = binary_mean)) + geom_sf(data = lacerta_thin %>% filter(class == \"presence\")) + scale_fill_discrete(na.value = \"transparent\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"projecting-to-the-future","dir":"Articles","previous_headings":"","what":"Projecting to the future","title":"tidysdm overview","text":"WorldClim wide selection projections future based different models Shared Socio-economic Pathways (SSP). Type help(\"WorldClim_2.1\") full list. use predictions based “HadGEM3-GC31-LL” model SSP 245 (intermediate green house gas emissions) resolution present day data (10 arc-minutes). first download data: Let’s see times available: predict 2090, prediction future available. Let’s now check available variables: Note future predictions include altitude (change time), needed , copy present. However, set uncorrelated variables used earlier, don’t need worry . predict using ensemble:","code":"download_dataset(\"WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m\") get_time_ce_steps(\"WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m\") #> [1] 2030 2050 2070 2090 get_vars_for_dataset(\"WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m\") #> [1] \"bio01\" \"bio02\" \"bio03\" \"bio04\" \"bio05\" \"bio06\" \"bio07\" \"bio08\" \"bio09\" #> [10] \"bio10\" \"bio11\" \"bio12\" \"bio13\" \"bio14\" \"bio15\" \"bio16\" \"bio17\" \"bio18\" #> [19] \"bio19\" climate_future <- pastclim::region_slice( time_ce = 2090, bio_variables = vars_uncor, data = \"WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m\", crop = iberia_poly ) prediction_future <- predict_raster(lacerta_ensemble, climate_future) ggplot() + geom_spatraster(data = prediction_future, aes(fill = mean)) + scale_fill_terrain_c()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"dealing-with-extrapolation","dir":"Articles","previous_headings":"","what":"Dealing with extrapolation","title":"tidysdm overview","text":"total area projection model may include environmental conditions lie outside range conditions covered calibration dataset. phenomenon can lead misinterpretation SDM outcomes due spatial extrapolation. tidysdm offers couple approaches deal problem. simplest one can clamp environmental variables stay within limits observed calibration set: predictions seem changed little. alternative allow values exceed ranges calibration set, compute Multivariate environmental similarity surfaces (MESS) (Elith et al. 2010) highlight areas extrapolation occurs thus visualise prediction’s uncertainty. estimate MESS future time slice used : Extrapolation occurs areas MESS values negative, magnitude negative values indicating extreme interpolation. plot, can see area extrapolation model already predicted suitability zero. explains clamping little predictions. can now overlay MESS values current prediction visualize areas characterized spatial extrapolation. Note clamping MESS useful making predictions future, also past present (latter case, allows us make sure background/pseudoabsences cover full range predictor variables area interest). tidymodels universe also includes functions estimate area applicability package waywiser, can used tidysdm.","code":"climate_future_clamped <- clamp_predictors(climate_future, training = lacerta_thin, .col= class) prediction_future_clamped <- predict_raster(lacerta_ensemble, raster = climate_future_clamped) ggplot() + geom_spatraster(data = prediction_future_clamped, aes(fill = mean)) + scale_fill_terrain_c() lacerta_mess_future <- extrapol_mess(x = climate_future, training = lacerta_thin, .col = \"class\") ggplot() + geom_spatraster(data = lacerta_mess_future) + scale_fill_viridis_b(na.value = \"transparent\") # subset mess lacerta_mess_future_subset <- lacerta_mess_future lacerta_mess_future_subset[lacerta_mess_future_subset >= 0] <- NA lacerta_mess_future_subset[lacerta_mess_future_subset < 0] <- 1 # convert into polygon lacerta_mess_future_subset <- as.polygons(lacerta_mess_future_subset) # plot as a mask ggplot() + geom_spatraster(data = prediction_future) + scale_fill_viridis_b(na.value = \"transparent\") + geom_sf(data = lacerta_mess_future_subset, fill= \"lightgray\", alpha = 0.5, linewidth = 0.5)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"visualising-the-contribution-of-individual-variables","dir":"Articles","previous_headings":"","what":"Visualising the contribution of individual variables","title":"tidysdm overview","text":"sometimes interest understand relative contribution individual variables prediction. complex task, especially interactions among variables. simpler linear models, possible obtain marginal response curves (show effect variable whilst keeping variables mean) using step_profile() recipes package. use step_profile() define new recipe can bake generate appropriate dataset make marginal prediction. can plot predictions values variable interest. example, investigate contribution bio05, : also possible use DALEX,explore tidysdm models; see details tidymodels additions article.","code":"bio05_prof <- lacerta_rec %>% step_profile(-bio05, profile = vars(bio05)) %>% prep(training = lacerta_thin) bio05_data <- bake(bio05_prof, new_data = NULL) bio05_data <- bio05_data %>% mutate( pred = predict(lacerta_ensemble, bio05_data)$mean ) ggplot(bio05_data, aes(x = bio05, y = pred)) + geom_point(alpha = .5, cex = 1)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a0_tidysdm_overview.html","id":"repeated-ensembles","dir":"Articles","previous_headings":"","what":"Repeated ensembles","title":"tidysdm overview","text":"steps thinning sampling pseudo-absences can bit impact performance SDMs. steps stochastic, good practice explore effect repeating , creating ensembles models repeats. tidysdm, possible create repeat_ensembles. start creating list simple_ensembles, looping SDM pipeline. just use two fast models speed process. Now can create repeat_ensemble list: can summarise goodness fit models repeat collect_metrics(), autoplot() function repeated_ensemble objects. can predict usual way (take mean median models):","code":"# empty object to store the simple ensembles that we will create ensemble_list <- list() set.seed(123) # make sure you set the seed OUTSIDE the loop for (i_repeat in 1:3) { # thin the data lacerta_thin_rep <- thin_by_cell(lacerta, raster = climate_present) lacerta_thin_rep <- thin_by_dist(lacerta_thin_rep, dist_min = 20000) # sample pseudo-absences lacerta_thin_rep <- sample_pseudoabs(lacerta_thin_rep, n = 3 * nrow(lacerta_thin_rep), raster = climate_present, method = c(\"dist_min\", 50000) ) # get climate lacerta_thin_rep <- lacerta_thin_rep %>% bind_cols(terra::extract(climate_present, lacerta_thin_rep, ID = FALSE)) # create folds lacerta_thin_rep_cv <- spatial_block_cv(lacerta_thin_rep, v = 5) # create a recipe lacerta_thin_rep_rec <- recipe(lacerta_thin_rep, formula = class ~ .) # create a workflow_set lacerta_thin_rep_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_thin_rep_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # maxent specs with tuning maxent = sdm_spec_maxent() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) # train the model lacerta_thin_rep_models <- lacerta_thin_rep_models %>% workflow_map(\"tune_grid\", resamples = lacerta_thin_rep_cv, grid = 10, metrics = sdm_metric_set(), verbose = TRUE ) # make an simple ensemble and add it to the list ensemble_list[[i_repeat]] <- simple_ensemble() %>% add_member(lacerta_thin_rep_models, metric = \"boyce_cont\") } #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (228ms) #> i 2 of 2 tuning: default_maxent #> ✔ 2 of 2 tuning: default_maxent (7.2s) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (225ms) #> i 2 of 2 tuning: default_maxent #> ✔ 2 of 2 tuning: default_maxent (6.9s) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (229ms) #> i 2 of 2 tuning: default_maxent #> ✔ 2 of 2 tuning: default_maxent (7.3s) lacerta_rep_ens <- repeat_ensemble() %>% add_repeat(ensemble_list) lacerta_rep_ens #> A repeat_ensemble of models #> #> Number of repeats: #> • 3 #> #> Members: #> • default_glm #> • default_maxent #> #> Available metrics: #> • boyce_cont #> • roc_auc #> • tss_max #> #> Metric used to tune workflows: #> • boyce_cont lacerta_rep_ens <- predict_raster(lacerta_rep_ens, climate_present, fun = c(\"mean\", \"median\") ) ggplot() + geom_spatraster(data = lacerta_rep_ens, aes(fill = median)) + scale_fill_terrain_c()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a1_palaeodata_application.html","id":"sdms-with-tidymodels-for-palaeo-data","dir":"Articles","previous_headings":"","what":"SDMs with tidymodels for palaeo data","title":"Application with palaeodata","text":"article, show Species Distribution Model can fitted tidysdm time-scattered (.e.palaeontological, archaeozoological, archaeological) data, samples covering different time periods. recommend users first read “tidysdm overview” article, introduces number functions concepts used present article. first load tidysdm:","code":"library(tidysdm) #> Loading required package: tidymodels #> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ recipes 1.1.0 #> ✔ dials 1.3.0 ✔ rsample 1.2.1 #> ✔ dplyr 1.1.4 ✔ tibble 3.2.1 #> ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1 #> ✔ infer 1.0.7 ✔ tune 1.2.1 #> ✔ modeldata 1.4.0 ✔ workflows 1.1.4 #> ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0 #> ✔ purrr 1.0.2 ✔ yardstick 1.3.1 #> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── #> ✖ purrr::discard() masks scales::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() #> • Use tidymodels_prefer() to resolve common conflicts. #> Loading required package: spatialsample"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a1_palaeodata_application.html","id":"preparing-your-data","dir":"Articles","previous_headings":"","what":"Preparing your data","title":"Application with palaeodata","text":"start loading set radiocarbon dates (calibrated) horses, covering 22k years ago 8k years ago. convert dataset sf data.frame can easily plot (tidyterra shines): background presences, use land mask present, taken pastclim, cut cover Europe: use tidyterra plot: now thin presences, locations 100km 2000 years apart. see left: now need time series palaeoclimate reconstructions. vignette, use example dataset pastclim. dataset reconstructions every 5k years past 20k years 1 degree resolution, 3 bioclimatic variables. suffice illustrative purposes, recommend download higher quality datasets pastclim real analysis. land mask, cut reconstructions cover Europe : Now thin observations keep one per cell raster (better equal area projection…), remove locations outside desired area (): Let’s see left points: Now sample pseudo-absences (constraint least 70km away presences), selecting three times number presences Let’s see presences absences: Now let’s get climate location. pastclim requires data frame two columns coordinates column time years present (negative values represent time past). manipulate sf object accordingly:","code":"data(horses) horses #> # A tibble: 788 × 3 #> latitude longitude time_bp #> #> 1 43.2 -2.04 -14000 #> 2 43.2 -2.04 -14000 #> 3 43.2 -2.04 -14000 #> 4 43.2 -2.04 -14000 #> 5 43.2 -2.04 -16000 #> 6 43.3 -1.89 -16000 #> 7 43.2 -2.2 -14000 #> 8 43.2 -2.2 -19000 #> 9 43.2 -2.2 -20000 #> 10 43.2 -2.2 -21000 #> # ℹ 778 more rows library(sf) #> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE horses <- st_as_sf(horses, coords = c(\"longitude\", \"latitude\")) st_crs(horses) <- 4326 #> Loading required package: terra #> terra 1.7.83 #> #> Attaching package: 'terra' #> The following object is masked from 'package:tidyr': #> #> extract #> The following object is masked from 'package:scales': #> #> rescale library(pastclim) land_mask <- pastclim::get_land_mask(time_bp = 0, dataset = \"Example\") europe_poly <- vect(region_outline$Europe) crs(europe_poly) <- \"lonlat\" land_mask <- crop(land_mask, europe_poly) land_mask <- mask(land_mask, europe_poly) library(tidyterra) #> #> Attaching package: 'tidyterra' #> The following object is masked from 'package:stats': #> #> filter ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_0)) + geom_sf(data = horses, aes(col = time_bp)) set.seed(123) horses <- thin_by_dist_time(horses, dist_min = km2m(100), interval_min = y2d(2000), time_col = \"time_bp\", lubridate_fun = pastclim::ybp2date ) nrow(horses) #> [1] 185 ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_0)) + geom_sf(data = horses, aes(col = time_bp)) library(pastclim) climate_vars <- c(\"bio01\", \"bio10\", \"bio12\") climate_full <- pastclim::region_series( bio_variables = climate_vars, data = \"Example\", crop = region_outline$Europe ) set.seed(123) horses <- thin_by_cell_time(horses, raster = climate_full, time_col = \"time_bp\", lubridate_fun = pastclim::ybp2date ) nrow(horses) #> [1] 138 ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_0)) + geom_sf(data = horses, aes(col = time_bp)) set.seed(123) horses <- sample_pseudoabs_time(horses, n_per_presence = 3, raster = climate_full, time_col = \"time_bp\", lubridate_fun = pastclim::ybp2date, method = c(\"dist_min\", km2m(70)) ) ggplot() + geom_spatraster(data = land_mask, aes(fill = land_mask_0)) + geom_sf(data = horses, aes(col = class)) horses_df <- horses %>% dplyr::bind_cols(sf::st_coordinates(horses)) %>% mutate(time_bp = date2ybp(time_step)) %>% as.data.frame() %>% select(-geometry) # get climate horses_df <- location_slice_from_region_series(horses_df, region_series = climate_full ) # add the climate reconstructions to the sf object, and remove the time_step # as we don't need it for modelling horses <- horses %>% bind_cols(horses_df[, climate_vars]) %>% select(-time_step)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a1_palaeodata_application.html","id":"fit-the-model-by-crossvalidation","dir":"Articles","previous_headings":"","what":"Fit the model by crossvalidation","title":"Application with palaeodata","text":"Next, need set recipe define handle dataset. don’t want transform data, just need define formula (class outcome, variables predictors; note , sf objects, geometry automatically ignored predictor): can quickly check variables want : now build workflow_set different models, defining hyperparameters want tune. use glm, gam, random forest boosted trees models, random forest boosted trees tunable hyperparameters. commonly used models, tidysdm automatically chooses important parameters, possible fully customise model specifications. Note gams unusual, need specify formula define variables fit smooths. default, gam_formula() fits smooth every continuous predictor, custom formula can provided instead. now want set spatial block cross-validation scheme tune assess models: can now use block CV folds tune assess models: Note workflow_set correctly detects tuning parameters glm gam. can look performance models : Now let’s create ensemble, selecting best set parameters model (really relevant random forest, hype-parameters tune glm gam). use Boyce continuous index metric choose best random forest boosted tree. adding members ensemble, automatically fitted full training dataset, ready make predictions. visualise ","code":"horses_rec <- recipe(horses, formula = class ~ .) horses_rec #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 3 #> coords: 2 horses_rec$var_info #> # A tibble: 6 × 4 #> variable type role source #> #> 1 bio01 predictor original #> 2 bio10 predictor original #> 3 bio12 predictor original #> 4 X coords original #> 5 Y coords original #> 6 class outcome original horses_models <- # create the workflow_set workflow_set( preproc = list(default = horses_rec), models = list( # the standard glm specs (no params to tune) glm = sdm_spec_glm(), # the standard sdm specs (no params to tune) gam = sdm_spec_gam(), # rf specs with tuning rf = sdm_spec_rf(), # boosted tree model (gbm) specs with tuning gbm = sdm_spec_boost_tree() ), # make all combinations of preproc and models, cross = TRUE ) %>% # set formula for gams update_workflow_model(\"default_gam\", spec = sdm_spec_gam(), formula = gam_formula(horses_rec) ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) library(tidysdm) set.seed(1005) horses_cv <- spatial_block_cv(horses, v = 5) autoplot(horses_cv) set.seed(123) horses_models <- horses_models %>% workflow_map(\"tune_grid\", resamples = horses_cv, grid = 5, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 4 resampling: default_glm #> ✔ 1 of 4 resampling: default_glm (261ms) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 2 of 4 resampling: default_gam #> ✔ 2 of 4 resampling: default_gam (652ms) #> i 3 of 4 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 3 of 4 tuning: default_rf (2.4s) #> i 4 of 4 tuning: default_gbm #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 4 of 4 tuning: default_gbm (15.2s) autoplot(horses_models) horses_ensemble <- simple_ensemble() %>% add_member(horses_models, metric = \"boyce_cont\") autoplot(horses_ensemble)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a1_palaeodata_application.html","id":"projecting-to-other-times","dir":"Articles","previous_headings":"","what":"Projecting to other times","title":"Application with palaeodata","text":"can now make predictions ensemble (using default option taking mean predictions model) Last Glacial Maximum (LGM, 21,000 years ago). predict using ensemble:","code":"climate_lgm <- pastclim::region_slice( time_bp = -20000, bio_variables = climate_vars, data = \"Example\", crop = region_outline$Europe ) prediction_lgm <- predict_raster(horses_ensemble, climate_lgm) ggplot() + geom_spatraster(data = prediction_lgm, aes(fill = mean)) + scale_fill_terrain_c()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"exploring-models-with-dalex","dir":"Articles","previous_headings":"","what":"Exploring models with DALEX","title":"Examples of additional tidymodels features","text":"issue machine learning algorithms easy understand role different variables giving final prediction. number packages created explore explain behaviour ML algorithms, used tidysdm. tidysdm overview article, illustrated use recipes create profiles. demonstrate use DALEX, excellent package methods deal tidymodels. tidysdm contains additional functions allow use use DALEX functions directly tidysdm ensembles. use simple ensemble built overview vignette. first step DALEX create explainer object, can queried different functions package, turn explainer explanation (following DALEX lingo). first step, use custom function explain_tidysdm generate explainer: Now explainer, can explore variable importance ensemble: generate partial dependency plots given variable (e.g. bio05): many functions DALEX can applied explainer explore behaviour model; see several tutorial https://modeloriented.github.io/DALEX/ also possible explore individual models make ensemble: resulting list can used build lists explanations, can plotted.","code":"library(tidysdm) #> Loading required package: tidymodels #> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ recipes 1.1.0 #> ✔ dials 1.3.0 ✔ rsample 1.2.1 #> ✔ dplyr 1.1.4 ✔ tibble 3.2.1 #> ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1 #> ✔ infer 1.0.7 ✔ tune 1.2.1 #> ✔ modeldata 1.4.0 ✔ workflows 1.1.4 #> ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0 #> ✔ purrr 1.0.2 ✔ yardstick 1.3.1 #> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── #> ✖ purrr::discard() masks scales::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() #> • Use tidymodels_prefer() to resolve common conflicts. #> Loading required package: spatialsample lacerta_ensemble #> A simple_ensemble of models #> #> Members: #> • default_glm #> • default_rf #> • default_gbm #> • default_maxent #> #> Available metrics: #> • boyce_cont #> • roc_auc #> • tss_max #> #> Metric used to tune workflows: #> • boyce_cont explainer_lacerta_ens <- explain_tidysdm(lacerta_ensemble) #> Preparation of a new explainer is initiated #> -> model label : data.frame ( default ) #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : predict_function #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidysdm , ver. 0.9.6.9002 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.01490969 , mean = 0.2861937 , max = 0.7169324 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.6465921 , mean = -0.03619367 , max = 0.7891973 #> A new explainer has been created! library(DALEX) #> Welcome to DALEX (version: 2.4.3). #> Find examples and detailed introduction at: http://ema.drwhy.ai/ #> Additional features will be available after installation of: ggpubr. #> Use 'install_dependencies()' to get all suggested dependencies #> #> Attaching package: 'DALEX' #> The following object is masked from 'package:dplyr': #> #> explain vip_ensemble <- model_parts(explainer = explainer_lacerta_ens) plot(vip_ensemble) pdp_bio05 <- model_profile(explainer_lacerta_ens, N = 500, variables = \"bio05\") plot(pdp_bio05) explainer_list <- explain_tidysdm(tidysdm::lacerta_ensemble, by_workflow = TRUE) #> Preparation of a new explainer is initiated #> -> model label : default_glm #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.2280177 , mean = 0.75 , max = 0.9854359 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.9096205 , mean = 5.395921e-12 , max = 0.7719823 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_rf #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.1315421 , mean = 0.7480648 , max = 1 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.6878921 , mean = 0.001935171 , max = 0.5870619 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_gbm #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.3390188 , mean = 0.7314788 , max = 0.9632964 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.9268645 , mean = 0.01852121 , max = 0.6280424 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_maxent #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.1095764 , mean = 0.6256817 , max = 0.9960248 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.8207859 , mean = 0.1243183 , max = 0.8904236 #> A new explainer has been created! profile_list <- lapply(explainer_list, model_profile, N = 500, variables = \"bio05\" ) plot(profile_list)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"the-initial-split","dir":"Articles","previous_headings":"","what":"The initial split","title":"Examples of additional tidymodels features","text":"standard approach tidymodels make initial split data test training set. use retain 20% data (1/5) testing set, use rest training. start loading set presences absences associated climate, analogous one generated tidysdm overview article: use spatial_initial_split split, using spatial_block_cv scheme partition data: check balance presences vs pseudoabsences: can now extract training set lacerta_initial split, sample folds set cross validation (note set cellsize offset based full dataset, lacerta_thin; allows us use grid used initial_split). check balance dataset:","code":"library(tidysdm) library(sf) #> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) set.seed(1005) lacerta_initial <- spatial_initial_split(lacerta_thin, prop = 1 / 5, spatial_block_cv ) autoplot(lacerta_initial) check_splits_balance(lacerta_initial, class) #> # A tibble: 1 × 4 #> presence_test pseudoabs_test presence_train pseudoabs_train #> #> 1 88 267 25 72 set.seed(1005) lacerta_training <- training(lacerta_initial) lacerta_cv <- spatial_block_cv(lacerta_training, v = 5, cellsize = grid_cellsize(lacerta_thin), offset = grid_offset(lacerta_thin) ) autoplot(lacerta_cv) check_splits_balance(lacerta_cv, class) #> # A tibble: 5 × 4 #> presence_assessment pseudoabs_assessment presence_analysis pseudoabs_analysis #> #> 1 74 197 14 70 #> 2 59 225 29 42 #> 3 73 220 15 47 #> 4 76 209 12 58 #> 5 70 218 18 49"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"different-recipes-for-certain-models","dir":"Articles","previous_headings":"","what":"Different recipes for certain models","title":"Examples of additional tidymodels features","text":"certain type models (e.g. glm, svm) struggle correlated variables; algorithms, random forests, can handle correlated variables. , create two recipes, one variables, one variables uncorrelated: now use two recipes workflowset (keep small computational time), selecting appropriate recipe model. include model (polynomial support vector machines, SVM) wrapper tidysdm creating model specification. However, can use standard model spec yardstick: can now use block CV folds tune assess models. Note multiple tuning approaches, besides standard grid method. use tune_bayes tune package (see help page see Gaussian Process model used choose parameter combinations). tuning method (opposed use standard grid) allow hyper-parameters unknown limits, mtry random forest undefined upper range depends number variables dataset. , tuning, need finalise mtry informing set dials actual data: now can tune models: can look performance models :","code":"lacerta_rec_all <- recipe(lacerta_thin, formula = class ~ .) lacerta_rec_uncor <- lacerta_rec_all %>% step_rm(all_of(c( \"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", \"bio18\", \"bio19\", \"altitude\" ))) lacerta_rec_uncor #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 20 #> coords: 2 #> #> ── Operations #> • Variables removed: all_of(c(\"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", #> \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", #> \"bio18\", \"bio19\", \"altitude\")) lacerta_models <- # create the workflow_set workflow_set( preproc = list( uncor = lacerta_rec_uncor, # recipe for the glm all = lacerta_rec_all, # recipe for the random forest all = lacerta_rec_uncor # recipe for svm ), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf(), # svm specs with tuning svm = parsnip::svm_poly( cost = tune(), degree = tune() ) %>% parsnip::set_engine(\"kernlab\") %>% parsnip::set_mode(\"classification\") ), # make all combinations of preproc and models, cross = FALSE ) %>% # tweak controls to store information needed later to create the ensemble # note that we use the bayes version as we will use a Bayes search (see later) option_add(control = stacks::control_stack_bayes()) rf_param <- lacerta_models %>% # extract the rf workflow extract_workflow(\"all_rf\") %>% # extract its parameters dials (used to tune) extract_parameter_set_dials() %>% # give it the predictors to finalize mtry finalize(x = st_drop_geometry(lacerta_thin) %>% select(-class)) # now update the workflowset with the new parameter info lacerta_models <- lacerta_models %>% option_add(param_info = rf_param, id = \"all_rf\") set.seed(1234567) lacerta_models <- lacerta_models %>% workflow_map(\"tune_bayes\", resamples = lacerta_cv, initial = 8, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 3 resampling: uncor_glm #> ✔ 1 of 3 resampling: uncor_glm (317ms) #> i 2 of 3 tuning: all_rf #> ! No improvement for 10 iterations; returning current results. #> ✔ 2 of 3 tuning: all_rf (16.1s) #> i 3 of 3 tuning: all_svm #> ✔ 3 of 3 tuning: all_svm (20.7s) autoplot(lacerta_models)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"stack-ensembles","dir":"Articles","previous_headings":"","what":"Stack ensembles","title":"Examples of additional tidymodels features","text":"Instead building simple ensemble best version model type, can build stack ensemble, implemented package stacks. Stacking uses meta-learning algorithm learn best combine multiple models, including multiple versions algorithm different hyper-parameters. can see three versions SVM one random forests selected; stacking coefficients give indication weight model carries within ensemble. can now use ensemble make predictions testing data: look goodness fit using commonly used sdm metrics. Note sdm_metric_set first invoked generate function (empty ()) used data. can now make predictions stacked ensemble. start extracting climate variables interest","code":"library(stacks) set.seed(1005) lacerta_stack <- # initialize the stack stacks() %>% # add candidate members add_candidates(lacerta_models) %>% # determine how to combine their predictions blend_predictions() %>% # fit the candidates with non-zero weights (i.e.non-zero stacking coefficients) fit_members() autoplot(lacerta_stack, type = \"weights\") lacerta_testing <- testing(lacerta_initial) lacerta_test_pred <- lacerta_testing %>% bind_cols(predict(lacerta_stack, ., type = \"prob\")) sdm_metric_set()(data = lacerta_test_pred, truth = class, .pred_presence) #> # A tibble: 3 × 3 #> .metric .estimator .estimate #> #> 1 boyce_cont binary 0.853 #> 2 roc_auc binary 0.986 #> 3 tss_max binary 0.92 download_dataset(\"WorldClim_2.1_10m\") climate_vars <- lacerta_rec_all$var_info %>% filter(role == \"predictor\") %>% pull(variable) climate_present <- pastclim::region_slice( time_ce = 1985, bio_variables = climate_vars, data = \"WorldClim_2.1_10m\", crop = iberia_poly ) prediction_present <- predict_raster(lacerta_stack, climate_present, type = \"prob\" ) library(tidyterra) #> #> Attaching package: 'tidyterra' #> The following object is masked from 'package:stats': #> #> filter ggplot() + geom_spatraster(data = prediction_present, aes(fill = .pred_presence)) + scale_fill_terrain_c() + # plot presences used in the model geom_sf(data = lacerta_thin %>% filter(class == \"presence\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a2_tidymodels_additions.html","id":"using-multi-level-factors-as-predictors","dir":"Articles","previous_headings":"","what":"Using multi-level factors as predictors","title":"Examples of additional tidymodels features","text":"machine learning algorithms natively use multilevel factors predictors. solution create dummy variables, binary variables represent levels factor. tidymodels, done using step_dummy function. Let’s create factor variable 3 levels based altitude. create recipe adding step create dummy variables topography variable. Let’s us see : added two “derived” variables, topography_hills topography_mountains, binary variables allow us code topography (plains used reference level, coded hills mountains 0 given location). can look first rows data see new variables baking recipe: can now run sdm usual: can now verify dummy variables used extracting model fit one models ensemble: can see coefficients topography_hills topography_mountains. Let us now predict presence lizard Iberian Peninsula using ensemble. Note , predict_raster() work, name levels categorical variable need match used training models (.e. recipe step_dummy()):","code":"library(tidysdm) # load the dataset lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) # create a topography variable with 3 levels based on altitude lacerta_thin$topography <- cut(lacerta_thin$altitude, breaks = c(-Inf, 200, 800, Inf), labels = c(\"plains\", \"hills\", \"mountains\")) table(lacerta_thin$topography) #> #> plains hills mountains #> 82 233 137 # subset to variable of interest lacerta_thin <- lacerta_thin %>% select(class, bio05, bio06, bio12, bio15, topography) lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_dummy(topography) lacerta_rec #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 5 #> coords: 2 #> #> ── Operations #> • Dummy variables from: topography lacerta_prep <- prep(lacerta_rec) summary(lacerta_prep) #> # A tibble: 9 × 4 #> variable type role source #> #> 1 bio05 predictor original #> 2 bio06 predictor original #> 3 bio12 predictor original #> 4 bio15 predictor original #> 5 X coords original #> 6 Y coords original #> 7 class outcome original #> 8 topography_hills predictor derived #> 9 topography_mountains predictor derived lacerta_bake <- bake(lacerta_prep, new_data = lacerta_thin) glimpse(lacerta_bake) #> Rows: 452 #> Columns: 9 #> $ bio05 30.50350, 25.28050, 23.67800, 29.68875, 26.34075,… #> $ bio06 1.477000, 3.631750, 0.789500, 6.048750, 1.869000,… #> $ bio12 596, 1490, 1395, 729, 1324, 1409, 1260, 1390, 116… #> $ bio15 50.59533, 50.07437, 47.24211, 58.88199, 51.62960,… #> $ X -5.394226, -8.374844, -7.886102, -8.231414, -7.17… #> $ Y 39.48495, 41.97207, 41.89992, 39.49710, 41.78401,… #> $ class presence, presence, presence, presence, presence,… #> $ topography_hills 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0… #> $ topography_mountains 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1… # define the models lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) # tune set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 3) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (196ms) #> i 2 of 2 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 2 of 2 tuning: default_rf (963ms) # fit the ensemble lacerta_ensemble <- simple_ensemble() %>% add_member(lacerta_models, metric = \"boyce_cont\") lacerta_ensemble$workflow[[1]] %>% extract_fit_parsnip() #> parsnip model object #> #> #> Call: stats::glm(formula = ..y ~ ., family = stats::binomial, data = data) #> #> Coefficients: #> (Intercept) bio05 bio06 #> -6.920024 0.635493 -0.334427 #> bio12 bio15 topography_hills #> -0.002505 -0.113106 -1.851098 #> topography_mountains #> -2.531259 #> #> Degrees of Freedom: 451 Total (i.e. Null); 445 Residual #> Null Deviance: 508.4 #> Residual Deviance: 189.5 AIC: 203.5 climate_present <- terra::readRDS(system.file(\"extdata/lacerta_climate_present_10m.rds\", package = \"tidysdm\")) # first we add a topography variable to the climate data climate_present$topography <- climate_present$altitude climate_present$topography <- terra::classify(climate_present$topography, rcl = c(-Inf, 200, 800, Inf), include.lowest=TRUE, brackets=TRUE) library(terra) #> terra 1.7.83 #> #> Attaching package: 'terra' #> The following objects are masked from 'package:kernlab': #> #> buffer, size #> The following object is masked from 'package:tidyr': #> #> extract #> The following object is masked from 'package:scales': #> #> rescale levels(climate_present$topography) <- data.frame(ID = c(0,1,2), topography = c(\"plains\", \"hills\", \"mountains\")) # now we can predict predict_factor <- predict_raster(lacerta_ensemble, climate_present) plot(predict_factor)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a3_troubleshooting.html","id":"nas-in-the-data","dir":"Articles","previous_headings":"","what":"NAs in the data","title":"Troubleshooting models that fail","text":"algorithms allow NAs. can generate problematic dataset loading Lacerta dataset, manually add NA: Let us set recipe fit workflow_set can see error self-explanatory. Also, note error impacts data splits (technically, rset objects): error repeated 15 times (5 splits 3 hyperparameter values). Prepping recipe (trains dataset) can help diagnosing problems: Note , training information, warned 1 row incomplete. use step_naomit deal programmatically, ascertain generating missing data (prefer latter, good SDM pipeline generate observations, presences pseudoabsences, missing data).","code":"library(tidysdm) #> Loading required package: tidymodels #> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ── #> ✔ broom 1.0.7 ✔ recipes 1.1.0 #> ✔ dials 1.3.0 ✔ rsample 1.2.1 #> ✔ dplyr 1.1.4 ✔ tibble 3.2.1 #> ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1 #> ✔ infer 1.0.7 ✔ tune 1.2.1 #> ✔ modeldata 1.4.0 ✔ workflows 1.1.4 #> ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0 #> ✔ purrr 1.0.2 ✔ yardstick 1.3.1 #> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── #> ✖ purrr::discard() masks scales::discard() #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() #> • Use tidymodels_prefer() to resolve common conflicts. #> Loading required package: spatialsample lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) lacerta_thin$bio05[37] <- NA lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_rm(all_of(c( \"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", \"bio18\", \"bio19\", \"altitude\" ))) lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (329ms) #> i 2 of 2 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> → A | error: Missing data in columns: bio05. #> There were issues with some computations A: x1 #> There were issues with some computations A: x15 #> #> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more #> information. #> Warning: Unknown or uninitialised column: `.notes`. #> ✖ 2 of 2 tuning: default_rf failed with lacerta_prep <- lacerta_rec %>% prep(lacerta_thin) lacerta_prep #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 20 #> coords: 2 #> #> ── Training information #> Training data contained 452 data points and 1 incomplete row. #> #> ── Operations #> • Variables removed: bio01, bio02, bio03, bio04, bio07, bio08, ... | Trained"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a3_troubleshooting.html","id":"recipes-and-the-response-variable","dir":"Articles","previous_headings":"","what":"Recipes and the response variable","title":"Troubleshooting models that fail","text":"response variable treated special way recipes, can lead problems. best manipulate (e.g. transform character factor) response variable recipe, since response variable available train test models, make projections. hard-coded step recipe includes response variable, model fit, fail start making predictions. Another potential mistake remove response variable selecting variables interest. can happen use step_select choose variables interest, error less clear: Let’s load data create recipe step_select: Now create workflow set fit : errors intuitive. However, models failed algorithms, suggests problem lies data preparation side (either data , recipe). Ideally, already look data (summary glimpse). , case, know data fine. Whilst prepping (sometimes baking) recipe generally informative predictor variables, hard diagnose problems outcome variable recipe. Prepping show anything obvious: case, process exclusion. Everything seems fine, models don’t work. ask outcome variable might problematic. general rule, found easier rely step_rm remove variables (e.g. correlated variables).","code":"lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) suggested_vars <- c(\"bio05\", \"bio06\", \"bio13\", \"bio14\", \"bio15\") lacerta_rec_sel <- recipe(lacerta_thin, formula = class ~ .) %>% step_select(all_of(suggested_vars)) lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec_sel), models = list( # the standard glm specs glm = sdm_spec_glm(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> → A | error: ! `logistic_reg()` was unable to find an outcome. #> ℹ Ensure that you have specified an outcome column and that it hasn't been #> removed in pre-processing. #> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more #> information. #> Warning: Unknown or uninitialised column: `.notes`. #> ✖ 1 of 2 resampling: default_glm failed with #> i 2 of 2 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> → A | error: ! `rand_forest()` was unable to find an outcome. #> ℹ Ensure that you have specified an outcome column and that it hasn't been #> removed in pre-processing. #> There were issues with some computations A: x1 #> There were issues with some computations A: x15 #> #> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more information. #> Unknown or uninitialised column: `.notes`. #> ✖ 2 of 2 tuning: default_rf failed with lacerta_prep_sel <- lacerta_rec_sel %>% prep(lacerta_thin) lacerta_prep_sel #> #> ── Recipe ────────────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 20 #> coords: 2 #> #> ── Training information #> Training data contained 452 data points and no incomplete rows. #> #> ── Operations #> • Variables selected: bio05, bio06, bio13, bio14, bio15 | Trained"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a3_troubleshooting.html","id":"using-the-desired-formula-with-gam","dir":"Articles","previous_headings":"","what":"Using the desired formula with GAM","title":"Troubleshooting models that fail","text":"General Additive Models unusual syntax, user define variables fitted splines. tidysdm functions simplify process, assuming user just wants fit standard smooth every continuous predictor. Note step defining formula incompatible using step_cor recipe. step_cor removes correlated variables recipes, using similar algorithm filter_collinear using method cor_caret. However, algorithm fitted data split cross-validating. means different variables eventually presented model fitted split, leading error mismatch formula available variables. known issue GAMs implemented tidymodels.","code":"lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_rm(all_of(c( \"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", \"bio18\", \"bio19\", \"altitude\" ))) lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # the standard gam specs gam = sdm_spec_gam() ), # make all combinations of preproc and models, cross = TRUE ) %>% # set formula for gams update_workflow_model(\"default_gam\", spec = sdm_spec_gam(), formula = gam_formula(lacerta_rec) ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 2 resampling: default_glm #> ✔ 1 of 2 resampling: default_glm (270ms) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 2 of 2 resampling: default_gam #> ✔ 2 of 2 resampling: default_gam (1.4s)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/articles/a3_troubleshooting.html","id":"when-only-some-splits-fail","dir":"Articles","previous_headings":"","what":"When only some splits fail","title":"Troubleshooting models that fail","text":"examples , splits used cross-validation given algorithms failed. However, also possible failures occur splits certain algorithms (technically, specific rsplit within certain workflows). type problem occurs, best extract problematic workflow, potentially investigate fitting specific rsplit. generate problematic dataset subsampling lacerta dataset: create 3 folds attempt fit models: see one folds gives us error using GAMs. error (“Fitting terminated step failure - check results carefully”) comes gam function package mgcv. quick google StackOverflow[https://stats.stackexchange.com/questions/576273/gam-model-warning-message-step-failure--theta-estimation] gives us idea error comes . start extracting results gam fits: see , .notes column, second item empty (zero rows). can check indeed contains error wanted: can now get problematic data split, extract training data: case, nothing obvious leads error (important check make sure enough presences split; presences generally lead errors). can now extract workflow refit split confirm isolated problem: next step dig deeper data, trying understand whether outliers problematic. specific steps depend algorithm giving problems.","code":"lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) set.seed(123) lacerta_thin <- lacerta_thin[sample( 1:nrow(lacerta_thin), nrow(lacerta_thin) / 5 ), ] lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>% step_rm(all_of(c( \"bio01\", \"bio02\", \"bio03\", \"bio04\", \"bio07\", \"bio08\", \"bio09\", \"bio10\", \"bio11\", \"bio12\", \"bio14\", \"bio16\", \"bio17\", \"bio18\", \"bio19\", \"altitude\" ))) lacerta_models <- # create the workflow_set workflow_set( preproc = list(default = lacerta_rec), models = list( # the standard glm specs glm = sdm_spec_glm(), # the standard gam specs gam = sdm_spec_gam(), # rf specs with tuning rf = sdm_spec_rf() ), # make all combinations of preproc and models, cross = TRUE ) %>% # set formula for gams update_workflow_model(\"default_gam\", spec = sdm_spec_gam(), formula = gam_formula(lacerta_rec) ) %>% # tweak controls to store information needed later to create the ensemble option_add(control = control_ensemble_grid()) set.seed(100) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 3) lacerta_models <- lacerta_models %>% workflow_map(\"tune_grid\", resamples = lacerta_cv, grid = 3, metrics = sdm_metric_set(), verbose = TRUE ) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 1 of 3 resampling: default_glm #> ✔ 1 of 3 resampling: default_glm (206ms) #> i No tuning parameters. `fit_resamples()` will be attempted #> i 2 of 3 resampling: default_gam #> → A | warning: Fitting terminated with step failure - check results carefully #> There were issues with some computations A: x1 #> There were issues with some computations A: x1 #> #> ✔ 2 of 3 resampling: default_gam (1.6s) #> i 3 of 3 tuning: default_rf #> i Creating pre-processing data to finalize unknown parameter: mtry #> ✔ 3 of 3 tuning: default_rf (475ms) gam_results <- extract_workflow_set_result(lacerta_models, id = \"default_gam\") gam_results #> # Resampling results #> # 3-fold spatial block cross-validation #> # A tibble: 3 × 5 #> splits id .metrics .notes .predictions #> #> 1 Fold1 #> 2 Fold2 #> 3 Fold3 #> #> There were issues with some computations: #> #> - Warning(s) x1: Fitting terminated with step failure - check results carefully #> #> Run `show_notes(.Last.tune.result)` for more information. gam_results$.notes[2] #> [[1]] #> # A tibble: 1 × 3 #> location type note #> #> 1 preprocessor 1/1, model 1/1 warning Fitting terminated with step failure - ch… problem_split <- gam_results$splits[2][[1]] summary(training(problem_split)) #> class geometry bio01 bio02 #> presence :18 POINT :63 Min. : 4.74 Min. : 6.737 #> pseudoabs:45 epsg:4326 : 0 1st Qu.:11.81 1st Qu.: 9.336 #> +proj=long...: 0 Median :13.09 Median :10.937 #> Mean :12.88 Mean :11.052 #> 3rd Qu.:14.82 3rd Qu.:12.649 #> Max. :17.87 Max. :14.037 #> bio03 bio04 bio05 bio06 #> Min. :34.30 Min. :341.2 Min. :19.90 Min. :-6.2732 #> 1st Qu.:39.30 1st Qu.:500.8 1st Qu.:24.91 1st Qu.:-0.6787 #> Median :40.55 Median :610.8 Median :28.59 Median : 1.1918 #> Mean :40.54 Mean :584.6 Mean :28.57 Mean : 1.2175 #> 3rd Qu.:42.19 3rd Qu.:656.1 3rd Qu.:32.31 3rd Qu.: 3.5664 #> Max. :46.98 Max. :756.7 Max. :35.31 Max. : 8.2344 #> bio07 bio08 bio09 bio10 #> Min. :16.40 Min. : 1.922 Min. : 1.588 Min. :12.86 #> 1st Qu.:23.32 1st Qu.: 7.716 1st Qu.:16.995 1st Qu.:18.53 #> Median :27.88 Median : 9.668 Median :19.828 Median :20.51 #> Mean :27.35 Mean : 9.450 Mean :18.938 Mean :20.48 #> 3rd Qu.:31.49 3rd Qu.:11.341 3rd Qu.:22.607 3rd Qu.:23.08 #> Max. :35.27 Max. :16.882 Max. :25.470 Max. :25.71 #> bio11 bio12 bio13 bio14 #> Min. :-2.060 Min. : 249.0 Min. : 36.0 Min. : 2.00 #> 1st Qu.: 4.968 1st Qu.: 452.0 1st Qu.: 59.0 1st Qu.: 8.00 #> Median : 6.236 Median : 628.0 Median : 91.0 Median :17.00 #> Mean : 6.268 Mean : 757.8 Mean :101.5 Mean :21.97 #> 3rd Qu.: 8.455 3rd Qu.:1016.5 3rd Qu.:119.0 3rd Qu.:30.50 #> Max. :11.795 Max. :1622.0 Max. :248.0 Max. :74.00 #> bio15 bio16 bio17 bio18 #> Min. :13.44 Min. : 96.0 Min. : 17.00 Min. : 22.0 #> 1st Qu.:30.07 1st Qu.:157.0 1st Qu.: 43.00 1st Qu.: 47.0 #> Median :38.97 Median :249.0 Median : 71.00 Median : 78.0 #> Mean :41.58 Mean :280.3 Mean : 88.08 Mean : 96.0 #> 3rd Qu.:54.30 3rd Qu.:334.0 3rd Qu.:109.50 3rd Qu.:117.5 #> Max. :71.59 Max. :714.0 Max. :253.00 Max. :253.0 #> bio19 altitude #> Min. : 68.0 Min. : 38.0 #> 1st Qu.:128.5 1st Qu.: 319.5 #> Median :225.0 Median : 689.0 #> Mean :252.5 Mean : 685.5 #> 3rd Qu.:319.5 3rd Qu.: 855.0 #> Max. :714.0 Max. :1926.0 gam_workflow <- extract_workflow(lacerta_models, id = \"default_gam\") faulty_gam <- fit(gam_workflow, training(problem_split)) #> Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L, #> : Fitting terminated with step failure - check results carefully"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Michela Leonardi. Author. Margherita Colucci. Author. Andrea Vittorio Pozzi. Author. Eleanor M.L. Scerri. Author. Andrea Manica. Author, maintainer.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Leonardi M, Colucci M, Pozzi , Scerri E, Manica (2024). tidysdm: Species Distribution Models Tidymodels. R package version 0.9.6.9002, https://evolecolgroup.github.io/tidysdm/, https://github.com/EvolEcolGroup/tidysdm.","code":"@Manual{, title = {tidysdm: Species Distribution Models with Tidymodels}, author = {Michela Leonardi and Margherita Colucci and Andrea Vittorio Pozzi and Eleanor M.L. Scerri and Andrea Manica}, year = {2024}, note = {R package version 0.9.6.9002, https://evolecolgroup.github.io/tidysdm/}, url = {https://github.com/EvolEcolGroup/tidysdm}, }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/index.html","id":"tidysdm-","dir":"","previous_headings":"","what":"Species Distribution Models with Tidymodels","title":"Species Distribution Models with Tidymodels","text":"goal tidysdm implement Species Distribution Models using tidymodels framework. advantage tidymodels model syntax results returned user standardised, thus providing coherent interface modelling. Given variety models required SDM, tidymodels ideal framework. tidysdm provides number wrappers specialised functions facilitate fitting SDM tidymodels. Besides modelling contemporary species, tidysdm number functions specifically designed work palaeontological data. Whilst users free use environmental data, articles showcase potential integration pastclim, helps downloading manipulating present day data, future predictions, palaeoclimate reconstructions. overview capabilities tidysdm given Leonardi et al. (2023).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Species Distribution Models with Tidymodels","text":"tidysdm CRAN, easiest way install : version CRAN recommended every day use. New features bug fixes appear first dev branch GitHub, make way CRAN. need early access new features, can install tidysdm directly GitHub. install GitHub, need use devtools; haven’t done already, get CRAN install.packages(\"devtools\"). can install latest dev version tidysdm GitHub :","code":"install.packages(\"tidysdm\") # install.packages(\"devtools\") devtools::install_github(\"EvolEcolGroup/tidysdm\", ref = \"dev\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/index.html","id":"overview-of-functionality","dir":"","previous_headings":"","what":"Overview of functionality","title":"Species Distribution Models with Tidymodels","text":"dedicated website, can find Articles giving step--step overview fitting SDMs contemporary species, well equivalent tutorial using palaeontological data. Furthermore, Article examples leverage various features tidymodels commonly adopted SDM pipelines also dev version site updated dev branch tidysdm (top left dev website, version number red format x.x.x.9xxx, indicating development version). want contribute, make sure read contributing guide.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/index.html","id":"when-something-does-not-work","dir":"","previous_headings":"","what":"When something does not work","title":"Species Distribution Models with Tidymodels","text":"get error trying fit model? tidysdm relatively new package, might well , get error, might encountered bug. However, also possible misspecified model (error comes tidymodels, model valid). prepared Article diagnose failing models. fully comprehensive list everything go wrong, hopefully give ideas dig deeper wrong. also check issues GitHub see whether problem already reported. convinced problem bug tidysdm, feel free create new issue. Please make sure updated latest version tidysdm, well updating packages system, provide reproducible example developers investigate problem. think can help fixing bug, read contributing guide.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_member.html","id":null,"dir":"Reference","previous_headings":"","what":"Add best member of workflow to a simple ensemble — add_member","title":"Add best member of workflow to a simple ensemble — add_member","text":"function adds member(s) simple_ensemble() object, taking best member workflow provided. possible pass individual tune_results objects tuned workflow, workflowsets::workflow_set().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_member.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add best member of workflow to a simple ensemble — add_member","text":"","code":"add_member(x, member, ...) # Default S3 method add_member(x, member, ...) # S3 method for class 'tune_results' add_member(x, member, metric = NULL, id = NULL, ...) # S3 method for class 'workflow_set' add_member(x, member, metric = NULL, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_member.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add best member of workflow to a simple ensemble — add_member","text":"x simple_ensemble member(s) added member tune_results, workflowsets::workflow_set ... used moment. metric character string (NULL) metric optimize. NULL, first metric used. id name given workflow wflow_id column.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_member.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add best member of workflow to a simple ensemble — add_member","text":"simple_ensemble additional member(s)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_repeat.html","id":null,"dir":"Reference","previous_headings":"","what":"Add repeat(s) to a repeated ensemble — add_repeat","title":"Add repeat(s) to a repeated ensemble — add_repeat","text":"function adds repeat(s) repeat_ensemble object, repeat simple_ensemble. repeats must contain members, selected using metric.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_repeat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add repeat(s) to a repeated ensemble — add_repeat","text":"","code":"add_repeat(x, rep, ...) # Default S3 method add_repeat(x, rep, ...) # S3 method for class 'simple_ensemble' add_repeat(x, rep, ...) # S3 method for class 'list' add_repeat(x, rep, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_repeat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add repeat(s) to a repeated ensemble — add_repeat","text":"x repeat_ensemble repeat(s) added rep repeat, single simple_ensemble, list simple_ensemble objects ... used moment.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/add_repeat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add repeat(s) to a repeated ensemble — add_repeat","text":"repeat_ensemble additional repeat(s)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot the results of a simple ensemble — autoplot.simple_ensemble","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"autoplot() method plots performance metrics ranked using metric.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"","code":"# S3 method for class 'simple_ensemble' autoplot( object, rank_metric = NULL, metric = NULL, std_errs = stats::qnorm(0.95), ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"object simple_ensemble whose elements results. rank_metric character string metric used rank results. none given, first metric metric set used (filtering metric option). metric character vector metrics (apart rank_metric) included visualization. NULL (default), available metrics plotted std_errs number standard errors plot (standard error exists). ... options pass autoplot(). Currently unused.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"ggplot object.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"function intended produce default plot visualize helpful information across possible applications simple_ensemble. sophisticated plots can produced using standard ggplot2 code plotting. x-axis workflow rank set (value one best) versus performance metric(s) y-axis. multiple metrics, facets metric, rank_metric first (provided; otherwise metric used create simple_ensemble used). multiple resamples used, confidence bounds shown result (95% confidence, default).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.simple_ensemble.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot the results of a simple ensemble — autoplot.simple_ensemble","text":"","code":"# \\donttest{ #' # we use the two_class_example from `workflowsets` two_class_ens <- simple_ensemble() %>% add_member(two_class_res, metric = \"roc_auc\") #> #> Attaching package: ‘plotrix’ #> The following object is masked from ‘package:scales’: #> #> rescale autoplot(two_class_ens) # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"method provides good visualization method spatial initial rsplit.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"","code":"# S3 method for class 'spatial_initial_split' autoplot(object, ..., alpha = 0.6)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"object spatial_initial_rsplit object. Note resamples made sf objects create spatial_initial_rsplit objects; function work resamples made non-spatial tibbles data.frames. ... Options passed ggplot2::geom_sf(). alpha Opacity, passed ggplot2::geom_sf(). Values alpha range 0 1, lower values corresponding transparent colors.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"ggplot object fold assigned color, made using ggplot2::geom_sf().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"plot method wrapper around standard spatial_rsplit method, re-labels folds Testing Training following convention standard initial_split object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/autoplot.spatial_initial_split.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a ggplot for a spatial initial rsplit. — autoplot.spatial_initial_split","text":"","code":"set.seed(123) block_initial <- spatial_initial_split(boston_canopy, prop = 1 / 5, spatial_block_cv ) autoplot(block_initial)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert an object created with blockCV to an rsample object — blockcv2rsample","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"function creates objects created blockCV rsample objects can used tidysdm. BlockCV provides sophisticated sampling options spatialsample library. example, possible stratify sampling ensure presences absences evenly distributed among folds (see example ).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"","code":"blockcv2rsample(x, data)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"x object created blockCV function data sf object used create x","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"rsample object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"Note currently objects type cv_spatial cv_cluster supported.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/blockcv2rsample.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert an object created with blockCV to an rsample object — blockcv2rsample","text":"","code":"# \\donttest{ library(blockCV) #> blockCV 3.1.4 points <- read.csv(system.file(\"extdata/\", \"species.csv\", package = \"blockCV\")) pa_data <- sf::st_as_sf(points, coords = c(\"x\", \"y\"), crs = 7845) sb1 <- cv_spatial( x = pa_data, column = \"occ\", # the response column to balance the folds k = 5, # number of folds size = 350000, # size of the blocks in metres selection = \"random\", # random blocks-to-fold iteration = 10 ) # find evenly dispersed folds #> | | | 0% | |======= | 10% | |============== | 20% | |===================== | 30% | |============================ | 40% | |=================================== | 50% | |========================================== | 60% | |================================================= | 70% | |======================================================== | 80% | |=============================================================== | 90% | |======================================================================| 100% #> train_0 train_1 test_0 test_1 #> 1 172 207 85 36 #> 2 218 202 39 41 #> 3 218 192 39 51 #> 4 217 171 40 72 #> 5 203 200 54 43 sb1_rsample <- blockcv2rsample(sb1, pa_data) class(sb1_rsample) #> [1] \"spatial_rset\" \"rset\" \"tbl_df\" \"tbl\" \"data.frame\" autoplot(sb1_rsample) # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":null,"dir":"Reference","previous_headings":"","what":"Boyce continuous index (BCI) — boyce_cont","title":"Boyce continuous index (BCI) — boyce_cont","text":"function Boyce Continuous Index, measure model accuracy appropriate Species Distribution Models presence data (.e. using pseudoabsences background). algorithm used comes package enmSdm, uses multiple overlapping windows.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Boyce continuous index (BCI) — boyce_cont","text":"","code":"boyce_cont(data, ...) # S3 method for class 'data.frame' boyce_cont( data, truth, ..., estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL ) # S3 method for class 'sf' boyce_cont(data, ...) boyce_cont_vec( truth, estimate, estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Boyce continuous index (BCI) — boyce_cont","text":"data Either data.frame containing columns specified truth estimate arguments, table/matrix true class results columns table. ... set unquoted column names one dplyr selector functions choose variables contain class probabilities. truth binary, 1 column selected, correspond value event_level. Otherwise, many columns factor levels truth ordering columns factor levels truth. truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimator One \"binary\", \"hand_till\", \"macro\", \"macro_weighted\" specify type averaging done. \"binary\" relevant two class case. others general methods calculating multiclass metrics. default automatically choose \"binary\" truth binary, \"hand_till\" truth >2 levels case_weights specified, \"macro\" truth >2 levels case_weights specified (case \"hand_till\" well-defined). na_rm logical value indicating whether NA values stripped computation proceeds. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default uses internal helper generally defaults \"first\" case_weights optional column identifier case weights. unquoted column name evaluates numeric column data. _vec() functions, numeric vector. estimate truth binary, numeric vector class probabilities corresponding \"relevant\" class. Otherwise, matrix many columns factor levels truth. assumed order levels truth.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Boyce continuous index (BCI) — boyce_cont","text":"tibble columns .metric, .estimator, .estimate 1 row values. grouped data frames, number rows returned number groups.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Boyce continuous index (BCI) — boyce_cont","text":"multiclass version function, operates binary predictions (e.g. presences absences SDMs).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Boyce continuous index (BCI) — boyce_cont","text":"Boyce, M.S., P.R. Vernier, S.E. Nielsen F.K.. Schmiegelow. 2002. Evaluating resource selection functions. Ecol. Model., 157, 281-300. Hirzel, .H., G. Le Lay, V. Helfer, C. Randin . Guisan. 2006. Evaluating ability habitat suitability models predict species presences. Ecol. Model., 199, 142-152.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/boyce_cont.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Boyce continuous index (BCI) — boyce_cont","text":"","code":"boyce_cont(two_class_example, truth, Class1) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 boyce_cont binary 0.805"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":null,"dir":"Reference","previous_headings":"","what":"Calibrate class thresholds — calib_class_thresh","title":"Calibrate class thresholds — calib_class_thresh","text":"Predict new dataset using simple ensemble. Predictions individual models combined according fun","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calibrate class thresholds — calib_class_thresh","text":"","code":"calib_class_thresh(object, class_thresh, metric_thresh = NULL)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calibrate class thresholds — calib_class_thresh","text":"object simple_ensemble object class_thresh probability threshold used convert probabilities classes. can number (0 1), character metric (currently \"tss_max\", \"kap_max\" \"sensitivity\"). sensitivity, additional target value passed along second element vector, e.g. c(\"sensitivity\",0.8). metric_thresh vector length 2 giving metric threshold, used prune models ensemble used prediction. 'metrics' need computed workflow tuned. metric's threshold needs match value used prediction. Examples c(\"accuracy\",0.8) c(\"boyce_cont\",0.7).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calibrate class thresholds — calib_class_thresh","text":"simple_ensemble object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/calib_class_thresh.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calibrate class thresholds — calib_class_thresh","text":"","code":"test_ens <- simple_ensemble() %>% add_member(two_class_res[1:3, ], metric = \"roc_auc\") test_ens <- calib_class_thresh(test_ens, class_thresh = \"tss_max\") test_ens <- calib_class_thresh(test_ens, class_thresh = \"kap_max\") test_ens <- calib_class_thresh(test_ens, class_thresh = c(\"sens\", 0.9))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_coords_names.html","id":null,"dir":"Reference","previous_headings":"","what":"Check that we have a valid pair of coordinate names — check_coords_names","title":"Check that we have a valid pair of coordinate names — check_coords_names","text":"internal function checks coords (passed functions) valid set names, , NULL, standard variable names data","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_coords_names.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check that we have a valid pair of coordinate names — check_coords_names","text":"","code":"check_coords_names(data, coords)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_coords_names.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check that we have a valid pair of coordinate names — check_coords_names","text":"data data.frame containing locations. coords vector length two giving names \"x\" \"y\" coordinates, points data.frame use standard names.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_coords_names.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check that we have a valid pair of coordinate names — check_coords_names","text":"vector length 2 valid names, correct order","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_sdm_presence.html","id":null,"dir":"Reference","previous_headings":"","what":"Check that the column with presences is correctly formatted — check_sdm_presence","title":"Check that the column with presences is correctly formatted — check_sdm_presence","text":"tidysdm, string defining presences first level response factor. function checks column correctly formatted.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_sdm_presence.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check that the column with presences is correctly formatted — check_sdm_presence","text":"","code":"check_sdm_presence(.data, .col, presence_level = \"presence\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_sdm_presence.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check that the column with presences is correctly formatted — check_sdm_presence","text":".data data.frame tibble, derived object sf data.frame .col column containing presences presence_level string used define presence level .col","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_sdm_presence.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check that the column with presences is correctly formatted — check_sdm_presence","text":"TRUE correctly formatted","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"Check balance presences vs pseudoabsences among splits","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"","code":"check_splits_balance(splits, .col)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"splits data splits (rset split object), generated function spatialsample::spatial_block_cv() .col column containing presences","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"tibble number presences pseudoabsences assessment analysis set split (training testing initial split)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/check_splits_balance.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Check the balance of presences vs pseudoabsences among splits — check_splits_balance","text":"","code":"lacerta_thin <- readRDS(system.file(\"extdata/lacerta_climate_sf.RDS\", package = \"tidysdm\" )) lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5) check_splits_balance(lacerta_cv, class) #> # A tibble: 5 × 4 #> presence_assessment pseudoabs_assessment presence_analysis pseudoabs_analysis #> #> 1 80 273 33 66 #> 2 80 283 33 56 #> 3 97 272 16 67 #> 4 94 262 19 77 #> 5 101 267 12 72"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/clamp_predictors.html","id":null,"dir":"Reference","previous_headings":"","what":"Clamp the predictors to match values in training set — clamp_predictors","title":"Clamp the predictors to match values in training set — clamp_predictors","text":"function clamps environmental variables terra::SpatRaster terra::SpatRasterDataset minimum maximum values exceed range training dataset.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/clamp_predictors.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Clamp the predictors to match values in training set — clamp_predictors","text":"","code":"clamp_predictors(x, training, .col, use_na) # Default S3 method clamp_predictors(x, training, .col, use_na) # S3 method for class 'SpatRaster' clamp_predictors(x, training, .col, use_na = FALSE) # S3 method for class 'SpatRasterDataset' clamp_predictors(x, training, .col, use_na = FALSE)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/clamp_predictors.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Clamp the predictors to match values in training set — clamp_predictors","text":"x terra::SpatRaster terra::SpatRasterDataset clamp. training training dataset (data.frame sf::sf object. .col column containing presences (optional). specified, excluded clamping. use_na boolean determining whether values outside range training dataset removed (set NA). FALSE (default), values outside training range replaced extremes training range.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/clamp_predictors.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Clamp the predictors to match values in training set — clamp_predictors","text":"terra::SpatRaster terra::SpatRasterDataset clamped ranges training","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"Return tibble performance metrics models.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"","code":"# S3 method for class 'simple_ensemble' collect_metrics(x, ...) # S3 method for class 'repeat_ensemble' collect_metrics(x, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"x simple_ensemble repeat_ensemble object ... currently used.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"tibble.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"applied ensemble, metrics returned contain actual tuning parameter columns values (unlike collect functions run objects). reason ensembles contain different types models models different tuning parameters.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/collect_metrics.simple_ensemble.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Obtain and format results produced by tuning functions for ensemble objects — collect_metrics.simple_ensemble","text":"","code":"collect_metrics(lacerta_ensemble) #> # A tibble: 12 × 5 #> wflow_id .metric mean std_err n #> #> 1 default_glm boyce_cont 0.683 0.0573 3 #> 2 default_glm roc_auc 0.789 0.0137 3 #> 3 default_glm tss_max 0.533 0.0192 3 #> 4 default_rf boyce_cont 0.632 0.0552 3 #> 5 default_rf roc_auc 0.791 0.0146 3 #> 6 default_rf tss_max 0.519 0.0284 3 #> 7 default_gbm boyce_cont 0.738 0.0473 3 #> 8 default_gbm roc_auc 0.792 0.00304 3 #> 9 default_gbm tss_max 0.545 0.0368 3 #> 10 default_maxent boyce_cont 0.832 0.0771 3 #> 11 default_maxent roc_auc 0.832 0.00649 3 #> 12 default_maxent tss_max 0.595 0.0124 3 collect_metrics(lacerta_rep_ens) #> # A tibble: 18 × 6 #> rep_id wflow_id .metric mean std_err n #> #> 1 rep_01 default_glm boyce_cont 0.796 0.0241 5 #> 2 rep_01 default_glm roc_auc 0.974 0.00385 5 #> 3 rep_01 default_glm tss_max 0.881 0.0155 5 #> 4 rep_01 default_maxent boyce_cont 0.834 0.0603 5 #> 5 rep_01 default_maxent roc_auc 0.981 0.0129 5 #> 6 rep_01 default_maxent tss_max 0.914 0.0345 5 #> 7 rep_02 default_glm boyce_cont 0.716 0.0520 5 #> 8 rep_02 default_glm roc_auc 0.940 0.0268 5 #> 9 rep_02 default_glm tss_max 0.813 0.0543 5 #> 10 rep_02 default_maxent boyce_cont 0.808 0.0745 5 #> 11 rep_02 default_maxent roc_auc 0.982 0.00807 5 #> 12 rep_02 default_maxent tss_max 0.883 0.0270 5 #> 13 rep_03 default_glm boyce_cont 0.402 0.179 5 #> 14 rep_03 default_glm roc_auc 0.940 0.0202 5 #> 15 rep_03 default_glm tss_max 0.821 0.0330 5 #> 16 rep_03 default_maxent boyce_cont 0.908 0.0312 5 #> 17 rep_03 default_maxent roc_auc 0.976 0.00727 5 #> 18 rep_03 default_maxent tss_max 0.854 0.0345 5"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/conf_matrix_df.html","id":null,"dir":"Reference","previous_headings":"","what":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","title":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","text":"Create confusion matrix multiple thresholds, using optimise tss","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/conf_matrix_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","text":"","code":"conf_matrix_df(presences, absences)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/conf_matrix_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","text":"presences Probabilities presences absences probabilities absences","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/conf_matrix_df.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make a confusion matrix dataframe for multiple thresholds — conf_matrix_df","text":"data.frame thresholds columns thres, tp, fp, fn, tn","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/control_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Control wrappers — control_ensemble_grid","title":"Control wrappers — control_ensemble_grid","text":"Supply light wrappers control argument tune::tune_grid(), tune::tune_bayes(), tune::fit_resamples() call return needed elements use ensemble. functions return appropriate control grid ensure assessment set predictions information model specifications preprocessors, supplied resampling results object! integrate ensemble settings existing control settings, note functions just call appropriate tune::control_* function arguments save_pred = TRUE, save_workflow = TRUE. wrappers equivalent ones used stacks package.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/control_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Control wrappers — control_ensemble_grid","text":"","code":"control_ensemble_grid() control_ensemble_resamples() control_ensemble_bayes()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/control_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Control wrappers — control_ensemble_grid","text":"tune::control_grid, tune::control_bayes, tune::control_resamples object.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":null,"dir":"Reference","previous_headings":"","what":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":"environmental variable, function computes density functions presences absences returns (1-overlap), measure distance two distributions. Variables high distance good candidates SDMs, species occurrences confined subset available background.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":"","code":"dist_pres_vs_bg(.data, .col)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":".data data.frame (derived object, tibble, sf) values bioclimate variables presences background .col column containing presences; assumes presences first level factor","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":"name vector distances","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/dist_pres_vs_bg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Distance between the distribution of climate values for presences vs background — dist_pres_vs_bg","text":"","code":"# This should be updated to use a dataset from tidysdm data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) %>% select(-ecoreg) bradypus_tb %>% dist_pres_vs_bg(presence) #> pre6190_l10 frs6190_ann tmn6190_ann pre6190_ann vap6190_ann pre6190_l7 #> 0.4366602 0.4299480 0.4295013 0.4096230 0.3945855 0.3933454 #> h_dem tmp6190_ann dtr6190_ann pre6190_l4 tmx6190_ann cld6190_ann #> 0.3647375 0.3316686 0.3288771 0.2544976 0.2418274 0.1812527 #> pre6190_l1 #> 0.1297035"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":null,"dir":"Reference","previous_headings":"","what":"Create explainer from your tidysdm ensembles. — explain_tidysdm","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"DALEX designed explore explain behaviour Machine Learning methods. function creates DALEX explainer (see DALEX::explain()), can queried multiple function create explanations model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"","code":"explain_tidysdm( model, data, y, predict_function, predict_function_target_column, residual_function, ..., label, verbose, precalculate, colorize, model_info, type, by_workflow ) # Default S3 method explain_tidysdm( model, data = NULL, y = NULL, predict_function = NULL, predict_function_target_column = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, precalculate = TRUE, colorize = !isTRUE(getOption(\"knitr.in.progress\")), model_info = NULL, type = \"classification\", by_workflow = FALSE ) # S3 method for class 'simple_ensemble' explain_tidysdm( model, data = NULL, y = NULL, predict_function = NULL, predict_function_target_column = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, precalculate = TRUE, colorize = !isTRUE(getOption(\"knitr.in.progress\")), model_info = NULL, type = \"classification\", by_workflow = FALSE ) # S3 method for class 'repeat_ensemble' explain_tidysdm( model, data = NULL, y = NULL, predict_function = NULL, predict_function_target_column = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, precalculate = TRUE, colorize = !isTRUE(getOption(\"knitr.in.progress\")), model_info = NULL, type = \"classification\", by_workflow = FALSE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"model object - model explained data data.frame matrix - data used calculate explanations. provided, extracted model. Data passed without target column (shall provided y argument). NOTE: target variable present data, functionalities may work properly. y numeric vector outputs/scores. provided, shall size data predict_function function takes two arguments: model new data returns numeric vector predictions. default yhat. predict_function_target_column Character numeric containing either column name column number model prediction object class considered positive (.e. class associated probability 1). NULL, second column output taken binary classification. multiclass classification setting, parameter cause switch binary classification mode one vs others probabilities. residual_function function takes four arguments: model, data, target vector y predict function (optionally). return numeric vector model residuals given data. provided, response residuals (\\(y-\\hat{y}\\)) calculated. default residual_function_default. ... parameters label character - name model. default extracted 'class' attribute model verbose logical. TRUE (default) diagnostic messages printed precalculate logical. TRUE (default) predicted_values residual calculated explainer created. happen also verbose TRUE. Set verbose precalculate FALSE omit calculations. colorize logical. TRUE (default) WARNINGS, ERRORS NOTES colorized. work R console. Now default FALSE knitting TRUE otherwise. model_info named list (package, version, type) containing information model. NULL, DALEX seek information . type type model, either classification regression. specified type extracted model_info. by_workflow boolean determining whether list explainer, one per model, returned instead single explainer ensemble","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"explainer object DALEX::explain ready work DALEX","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/explain_tidysdm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create explainer from your tidysdm ensembles. — explain_tidysdm","text":"","code":"# \\donttest{ # using the whole ensemble lacerta_explainer <- explain_tidysdm(tidysdm::lacerta_ensemble) #> Preparation of a new explainer is initiated #> -> model label : data.frame ( default ) #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : predict_function #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidysdm , ver. 0.9.6.9002 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.01490969 , mean = 0.2861937 , max = 0.7169324 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.6465921 , mean = -0.03619367 , max = 0.7891973 #> A new explainer has been created! # by workflow explainer_list <- explain_tidysdm(tidysdm::lacerta_ensemble, by_workflow = TRUE ) #> Preparation of a new explainer is initiated #> -> model label : default_glm #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.2280177 , mean = 0.75 , max = 0.9854359 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.9096205 , mean = 5.395921e-12 , max = 0.7719823 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_rf #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.1315421 , mean = 0.7480648 , max = 1 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.6878921 , mean = 0.001935171 , max = 0.5870619 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_gbm #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.3390188 , mean = 0.7314788 , max = 0.9632964 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.9268645 , mean = 0.01852121 , max = 0.6280424 #> A new explainer has been created! #> Preparation of a new explainer is initiated #> -> model label : default_maxent #> -> data : 444 rows 4 cols #> -> data : tibble converted into a data.frame #> -> target variable : 444 values #> -> predict function : yhat.workflow will be used ( default ) #> -> predicted values : No value for predict function target column. ( default ) #> -> model_info : package tidymodels , ver. 1.2.0 , task classification ( default ) #> -> model_info : type set to classification #> -> predicted values : numerical, min = 0.1095764 , mean = 0.6256817 , max = 0.9960248 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.8207859 , mean = 0.1243183 , max = 0.8904236 #> A new explainer has been created! # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":null,"dir":"Reference","previous_headings":"","what":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"Compute multivariate environmental similarity surfaces (MESS), described Elith et al., 2010.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"","code":"extrapol_mess(x, training, .col, ...) # Default S3 method extrapol_mess(x, training, ...) # S3 method for class 'SpatRaster' extrapol_mess(x, training, .col, filename = \"\", ...) # S3 method for class 'data.frame' extrapol_mess(x, training, .col, ...) # S3 method for class 'SpatRasterDataset' extrapol_mess(x, training, .col, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"x terra::SpatRaster, terra::SpatRasterDataset data.frame training matrix data.frame sf object containing reference values; column correspond one layer terra::SpatRaster object, exception presences column defined .col (optional). .col column containing presences (optional). specified, excluded computing MESS scores. ... additional arguments terra::writeRaster() filename character. Output filename (optional)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"terra::SpatRaster (data.frame) MESS values.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"function modified version mess package predicts, method added work terra::SpatRasterDataset. Note method terra::SpatRasterDataset assumes variables stored terra::SpatRaster time information within x. Time also assumed years. conditions met, possible manually extract terra::SpatRaster time step, use extrapol_mess terra::SpatRasters","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"Elith J., M. Kearney M., S. Phillips, 2010. art modelling range-shifting species. Methods Ecology Evolution 1:330-342.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/extrapol_mess.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Multivariate environmental similarity surfaces (MESS) — extrapol_mess","text":"Jean-Pierre Rossi, Robert Hijmans, Paulo van Breugel, Andrea Manica","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":null,"dir":"Reference","previous_headings":"","what":"Filter to retain only variables that have low collinearity — filter_collinear","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"method finds subset variables low collinearity. provides three methods: cor_caret, stepwise approach remove variables pairwise correlation given cutoff, choosing variable greatest mean correlation (based algorithm caret::findCorrelation); vif_step, stepwise approach remove variables variance inflation factor given cutoff (based algorithm usdm::vifstep), vif_cor, stepwise approach , step, find pair variables highest correlation cutoff removes one largest vif. correlation certain cutoff. methods terra::SpatRaster, data.frame matrix. terra::SpatRaster data.frame, numeric variables considered.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"","code":"filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, ... ) # Default S3 method filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, ... ) # S3 method for class 'SpatRaster' filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, exhaustive = FALSE, ... ) # S3 method for class 'data.frame' filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, ... ) # S3 method for class 'matrix' filter_collinear( x, cutoff = NULL, verbose = FALSE, names = TRUE, to_keep = NULL, method = \"cor_caret\", cor_type = \"pearson\", max_cells = Inf, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"x terra::SpatRaster object, data.frame (numeric variables) cutoff numeric value used threshold remove variables. , \"cor_caret\" \"vif_cor\", pair-wise absolute correlation cutoff, defaults 0.7. \"vif_step\", variable inflation factor, defaults 10 verbose boolean whether additional information provided screen names logical; column names returned TRUE column index FALSE)? to_keep vector variable names want force set (note function return error correlation among variables higher cutoff). method character. One \"cor_caret\", \"vif_cor\" \"vif_step\". cor_type character. methods use correlation, type correlation: \"pearson\", \"kendall\", \"spearman\". Defaults \"pearson\" max_cells positive integer. maximum number cells used. smaller ncell(x), regular sample x used ... additional arguments specific given object type exhaustive boolean. Used terra::SpatRaster downsampling max_cells, require exhaustive approach terra::spatSample(). needed rasters sparse large, see help page terra::spatSample() details.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"vector names columns correlation threshold (names = TRUE), otherwise vector indices. Note indices numeric variables (.e. factors present, indices take account).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"Naimi, B., Hamm, N..S., Groen, T.., Skidmore, .K., Toxopeus, .G. 2014. positional uncertainty problem species distribution modelling?, Ecography 37 (2): 191-203.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_collinear.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Filter to retain only variables that have low collinearity — filter_collinear","text":"cor_caret: Original R code Dong Li, modified Max Kuhn Andrea Manica; vif_step vif_cor, original algorithm Babak Naimi, rewritten Andrea Manica tidysdm","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":null,"dir":"Reference","previous_headings":"","what":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"FUNCTION DEPRECATED. USE filter_collinear method=cor_caret instead","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"","code":"filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL) # Default S3 method filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL) # S3 method for class 'SpatRaster' filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL) # S3 method for class 'data.frame' filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL) # S3 method for class 'matrix' filter_high_cor(x, cutoff = 0.7, verbose = FALSE, names = TRUE, to_keep = NULL)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"x terra::SpatRaster object, data.frame (numeric variables), correlation matrix cutoff numeric value pair-wise absolute correlation cutoff verbose boolean printing details names logical; column names returned TRUE column index FALSE)? to_keep vector variable names want force set (note function return error correlation among variables higher cutoff).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"vector names columns correlation threshold (names = TRUE), otherwise vector indices. Note indices numeric variables (.e. factors present, indices take account).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/filter_high_cor.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Deprecated: Filter to retain only variables below a given correlation threshold — filter_high_cor","text":"method finds subset variable correlation certain cutoff. methods terra::SpatRaster, data.frame, work directly correlation matrix previously estimated. data.frame, numeric variables considered. algorithm based caret::findCorrelation, using exact option. absolute values pair-wise correlations considered. two variables high correlation, function looks mean absolute correlation variable removes variable largest mean absolute correlation. several function package subselect can also used accomplish goal tend retain predictors.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the response variable from a formula — form_resp","title":"Get the response variable from a formula — form_resp","text":"counterpart rsample::form_pred.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the response variable from a formula — form_resp","text":"","code":"form_resp(x)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get the response variable from a formula — form_resp","text":"x formula","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the response variable from a formula — form_resp","text":"character name response","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/form_resp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get the response variable from a formula — form_resp","text":"Note: might behave well functions log(y). neither form_pred modified https://stackoverflow.com/questions/13217322/--reliably-get-dependent-variable-name--formula-object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/gam_formula.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a formula for gam — gam_formula","title":"Create a formula for gam — gam_formula","text":"function takes formula recipe, turns numeric predictors smooths given k. formula can passed workflow workflow set fitting gam.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/gam_formula.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a formula for gam — gam_formula","text":"","code":"gam_formula(object, k = 10)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/gam_formula.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a formula for gam — gam_formula","text":"object recipes::recipe, already trained k k value smooth","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/gam_formula.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a formula for gam — gam_formula","text":"formula","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":null,"dir":"Reference","previous_headings":"","what":"Split violin geometry for ggplots — geom_split_violin","title":"Split violin geometry for ggplots — geom_split_violin","text":"geometry displays density distribution two groups side side, two halves violin. Note emptyx aesthetic provided even want plot single variable (see example ).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Split violin geometry for ggplots — geom_split_violin","text":"","code":"geom_split_violin( mapping = NULL, data = NULL, stat = \"ydensity\", position = \"identity\", nudge = 0, ..., draw_quantiles = NULL, trim = TRUE, scale = \"area\", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Split violin geometry for ggplots — geom_split_violin","text":"mapping Set aesthetic mappings created aes(). specified inherit.aes = TRUE (default), combined default mapping top level plot. must supply mapping plot mapping. data data displayed layer. three options: NULL, default, data inherited plot data specified call ggplot(). data.frame, object, override plot data. objects fortified produce data frame. See fortify() variables created. function called single argument, plot data. return value must data.frame, used layer data. function can created formula (e.g. ~ head(.x, 10)). stat Use override default connection ggplot2::geom_violin() ggplot2::stat_ydensity(). position position adjustment use data layer. can used various ways, including prevent overplotting improving display. position argument accepts following: result calling position function, position_jitter(). method allows passing extra arguments position. string naming position adjustment. give position string, strip function name position_ prefix. example, use position_jitter(), give position \"jitter\". information ways specify position, see layer position documentation. nudge Add space half-violin middle space allotted given factor x-axis. ... arguments passed layer()'s params argument. arguments broadly fall one 4 categories . Notably, arguments position argument, aesthetics required can passed .... Unknown arguments part 4 categories ignored. Static aesthetics mapped scale, fixed value apply layer whole. example, colour = \"red\" linewidth = 3. geom's documentation Aesthetics section lists available options. 'required' aesthetics passed params. Please note passing unmapped aesthetics vectors technically possible, order required length guaranteed parallel input data. constructing layer using stat_*() function, ... argument can used pass parameters geom part layer. example stat_density(geom = \"area\", outline.type = \"\"). geom's documentation lists parameters can accept. Inversely, constructing layer using geom_*() function, ... argument can used pass parameters stat part layer. example geom_area(stat = \"density\", adjust = 0.5). stat's documentation lists parameters can accept. key_glyph argument layer() may also passed .... can one functions described key glyphs, change display layer legend. draw_quantiles (NULL) (default), draw horizontal lines given quantiles density estimate. trim TRUE (default), trim tails violins range data. FALSE, trim tails. scale \"area\" (default), violins area (trimming tails). \"count\", areas scaled proportionally number observations. \"width\", violins maximum width. na.rm FALSE, default, missing values removed warning. TRUE, missing values silently removed. show.legend logical. layer included legends? NA, default, includes aesthetics mapped. FALSE never includes, TRUE always includes. can also named logical vector finely select aesthetics display. inherit.aes FALSE, overrides default aesthetics, rather combining . useful helper functions define data aesthetics inherit behaviour default plot specification, e.g. borders().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Split violin geometry for ggplots — geom_split_violin","text":"ggplot2::layer object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Split violin geometry for ggplots — geom_split_violin","text":"implementation based https://stackoverflow.com/questions/35717353/split-violin-plot--ggplot2. Credit goes @jan-jlx providing complete implementation StackOverflow, Trang Q. Nguyen adding nudge parameter.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/geom_split_violin.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Split violin geometry for ggplots — geom_split_violin","text":"","code":"data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) ggplot(bradypus_tb, aes( x = \"\", y = cld6190_ann, fill = presence )) + geom_split_violin(nudge = 0.01)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_cellsize.html","id":null,"dir":"Reference","previous_headings":"","what":"Get default grid cellsize for a given dataset — grid_cellsize","title":"Get default grid cellsize for a given dataset — grid_cellsize","text":"function facilitates using spatialsample::spatial_block_cv multiple times analysis. spatialsample::spatial_block_cv creates grid based object data. However, spatial blocks generated multiple times analysis (e.g. spatial_initial_split(), subsequently cross-validation training dataset), might desirable keep grid). applying function largest dataset, usually full dataset spatial_initial_split(). resulting cellsize can used option spatialsample::spatial_block_cv.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_cellsize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get default grid cellsize for a given dataset — grid_cellsize","text":"","code":"grid_cellsize(data, n = c(10, 10))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_cellsize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get default grid cellsize for a given dataset — grid_cellsize","text":"data sf::sf dataset used size grid n number cells grid, defaults c(10,10), also default sf::st_make_grid()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_cellsize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get default grid cellsize for a given dataset — grid_cellsize","text":"cell size","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_offset.html","id":null,"dir":"Reference","previous_headings":"","what":"Get default grid cellsize for a given dataset — grid_offset","title":"Get default grid cellsize for a given dataset — grid_offset","text":"function facilitates using spatialsample::spatial_block_cv multiple times analysis. spatialsample::spatial_block_cv creates grid based object data. However, spatial blocks generated multiple times analysis (e.g. spatial_initial_split(), subsequently cross-validation training dataset), might desirable keep grid). applying function largest dataset, usually full dataset spatial_initial_split(). resulting cellsize can used option spatialsample::spatial_block_cv.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_offset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get default grid cellsize for a given dataset — grid_offset","text":"","code":"grid_offset(data)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_offset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get default grid cellsize for a given dataset — grid_offset","text":"data sf::sf dataset used size grid","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/grid_offset.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get default grid cellsize for a given dataset — grid_offset","text":"grid offset","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/horses.html","id":null,"dir":"Reference","previous_headings":"","what":"Coordinates of radiocarbon dates for horses — horses","title":"Coordinates of radiocarbon dates for horses — horses","text":"Coordinates presences horses 22k 8k YBP.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/horses.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coordinates of radiocarbon dates for horses — horses","text":"","code":"horses"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/horses.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Coordinates of radiocarbon dates for horses — horses","text":"tibble 1,297 rows 3 variables: latitude latitudes degrees longitude longitudes degrees time_bp time years present","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum Cohen's Kappa — kap_max","title":"Maximum Cohen's Kappa — kap_max","text":"Cohen's Kappa (yardstick::kap()) measure similar yardstick::accuracy(), normalises observed accuracy value expected chance (helps unbalanced cases one class predominant).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum Cohen's Kappa — kap_max","text":"","code":"kap_max(data, ...) # S3 method for class 'data.frame' kap_max( data, truth, ..., estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL ) # S3 method for class 'sf' kap_max(data, ...) kap_max_vec( truth, estimate, estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum Cohen's Kappa — kap_max","text":"data Either data.frame containing columns specified truth estimate arguments, table/matrix true class results columns table. ... set unquoted column names one dplyr selector functions choose variables contain class probabilities. truth binary, 1 column selected, correspond value event_level. Otherwise, many columns factor levels truth ordering columns factor levels truth. truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimator One \"binary\", \"hand_till\", \"macro\", \"macro_weighted\" specify type averaging done. \"binary\" relevant two class case. others general methods calculating multiclass metrics. default automatically choose \"binary\" truth binary, \"hand_till\" truth >2 levels case_weights specified, \"macro\" truth >2 levels case_weights specified (case \"hand_till\" well-defined). na_rm logical value indicating whether NA values stripped computation proceeds. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default uses internal helper generally defaults \"first\" case_weights optional column identifier case weights. unquoted column name evaluates numeric column data. _vec() functions, numeric vector. estimate truth binary, numeric vector class probabilities corresponding \"relevant\" class. Otherwise, matrix many columns factor levels truth. assumed order levels truth.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum Cohen's Kappa — kap_max","text":"tibble columns .metric, .estimator, .estimate 1 row values. grouped data frames, number rows returned number groups.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum Cohen's Kappa — kap_max","text":"function calibrates probability threshold classify presences maximises kappa. multiclass version function, operates binary predictions (e.g. presences absences SDMs).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Maximum Cohen's Kappa — kap_max","text":"Cohen, J. (1960). \"coefficient agreement nominal scales\". Educational Psychological Measurement. 20 (1): 37-46. Cohen, J. (1968). \"Weighted kappa: Nominal scale agreement provision scaled disagreement partial credit\". Psychological Bulletin. 70 (4): 213-220.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/kap_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum Cohen's Kappa — kap_max","text":"","code":"kap_max(two_class_example, truth, Class1) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 kap_max binary 0.725"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert a geographic distance from km to m — km2m","title":"Convert a geographic distance from km to m — km2m","text":"function takes distance km converts meters, units generally used geographic operations R. trivial conversion, functions ensures zeroes lost along way!","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert a geographic distance from km to m — km2m","text":"","code":"km2m(x)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert a geographic distance from km to m — km2m","text":"x number km","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert a geographic distance from km to m — km2m","text":"number meters","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/km2m.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert a geographic distance from km to m — km2m","text":"","code":"km2m(10000) #> [1] 1e+07 km2m(1) #> [1] 1000"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta.html","id":null,"dir":"Reference","previous_headings":"","what":"Coordinates of presences for Iberian emerald lizard — lacerta","title":"Coordinates of presences for Iberian emerald lizard — lacerta","text":"Coordinates presences Lacerta schreiberi. variables follows:","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coordinates of presences for Iberian emerald lizard — lacerta","text":"","code":"lacerta"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Coordinates of presences for Iberian emerald lizard — lacerta","text":"tibble 1,297 rows 3 variables: ID ids GBIF latitude latitudes degrees longitude longitudes degrees","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"A simple ensemble for the lacerta data — lacerta_ensemble","title":"A simple ensemble for the lacerta data — lacerta_ensemble","text":"Ensemble SDM Lacerta schreiberi, generated vignette.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A simple ensemble for the lacerta data — lacerta_ensemble","text":"","code":"lacerta_ensemble"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_ensemble.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A simple ensemble for the lacerta data — lacerta_ensemble","text":"simple_ensemble object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_rep_ens.html","id":null,"dir":"Reference","previous_headings":"","what":"A repeat ensemble for the lacerta data — lacerta_rep_ens","title":"A repeat ensemble for the lacerta data — lacerta_rep_ens","text":"Ensemble SDM Lacerta schreiberi, generated vignette.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_rep_ens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A repeat ensemble for the lacerta data — lacerta_rep_ens","text":"","code":"lacerta_rep_ens"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacerta_rep_ens.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A repeat ensemble for the lacerta data — lacerta_rep_ens","text":"repeat_ensemble object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacertidae_background.html","id":null,"dir":"Reference","previous_headings":"","what":"Coordinates of presences for lacertidae in the Iberian peninsula — lacertidae_background","title":"Coordinates of presences for lacertidae in the Iberian peninsula — lacertidae_background","text":"Coordinates presences lacertidae, used background lacerta dataset.. variables follows:","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacertidae_background.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coordinates of presences for lacertidae in the Iberian peninsula — lacertidae_background","text":"","code":"lacertidae_background"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/lacertidae_background.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Coordinates of presences for lacertidae in the Iberian peninsula — lacertidae_background","text":"tibble 1,297 rows 3 variables: ID ids GBIF latitude latitudes degrees longitude longitudes degrees","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":null,"dir":"Reference","previous_headings":"","what":"Make a mask from presence data — make_mask_from_presence","title":"Make a mask from presence data — make_mask_from_presence","text":"functions uses presence column create mask apply raster define area interest. Two methods available: one uses buffer around presence, one create convex hull around presences (possibility adding buffer around hull).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make a mask from presence data — make_mask_from_presence","text":"","code":"make_mask_from_presence(data, method = \"buffer\", buffer = 0, return_sf = FALSE)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make a mask from presence data — make_mask_from_presence","text":"data sf::sf data frame presences.. method method use create mask. Either 'buffer' 'convex_hull' buffer buffer add around presence (units crs data; lat/lon, buffer meters), around convex hull (method 'convex_hull') return_sf whether return mask sf object (TRUE) terra::SpatVector object (FALSE, default)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make a mask from presence data — make_mask_from_presence","text":"terra::SpatVector sf object (depending value return_sf) mask","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Make a mask from presence data — make_mask_from_presence","text":"use terra::mask() raster, use return_sf = FALSE get terra::SpatVector object can used masking.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/make_mask_from_presence.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Make a mask from presence data — make_mask_from_presence","text":"","code":"lacerta_sf <- lacerta %>% sf::st_as_sf(coords = c(\"longitude\", \"latitude\")) %>% sf::st_set_crs(4326) land_mask <- terra::readRDS(system.file(\"extdata/lacerta_land_mask.rds\", package = \"tidysdm\")) mask_buffer <- make_mask_from_presence(lacerta_sf, method = \"buffer\", buffer = 60000) terra::plot(terra::mask(land_mask, mask_buffer)) mask_ch <- make_mask_from_presence(lacerta_sf, method = \"convex_hull\") terra::plot(terra::mask(land_mask, mask_ch))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":null,"dir":"Reference","previous_headings":"","what":"MaxEnt model — maxent","title":"MaxEnt model — maxent","text":"maxent defines MaxEnt model used Species Distribution Models. good guide options MaxEnt model work can found https://onlinelibrary.wiley.com/doi/full/10.1111/j.1600-0587.2013.07872.x","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"MaxEnt model — maxent","text":"","code":"maxent( mode = \"classification\", engine = \"maxnet\", feature_classes = NULL, regularization_multiplier = NULL )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"MaxEnt model — maxent","text":"mode single character string type model. possible value model \"classification\". engine single character string specifying computational engine use fitting. Currently \"maxnet\" available. feature_classes character, continuous feature classes desired, either \"default\" subset \"lqpht\" (example, \"lh\") regularization_multiplier numeric, constant adjust regularization","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"MaxEnt model — maxent","text":"parsnip::model_spec maxent model","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"MaxEnt model — maxent","text":"","code":"# \\donttest{ # format the data data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) %>% select(-ecoreg) # fit the model, and make some predictions maxent_spec <- maxent(feature_classes = \"lq\") maxent_fitted <- maxent_spec %>% fit(presence ~ ., data = bradypus_tb) pred_prob <- predict(maxent_fitted, new_data = bradypus[, -1], type = \"prob\") pred_class <- predict(maxent_fitted, new_data = bradypus[, -1], type = \"class\") # Now with tuning maxent_spec <- maxent( regularization_multiplier = tune(), feature_classes = tune() ) set.seed(452) cv <- vfold_cv(bradypus_tb, v = 2) maxent_tune_res <- maxent_spec %>% tune_grid(presence ~ ., cv, grid = 3) show_best(maxent_tune_res, metric = \"roc_auc\") #> # A tibble: 3 × 8 #> feature_classes regularization_multip…¹ .metric .estimator mean n std_err #> #> 1 l 1.02 roc_auc binary 0.857 2 0.0143 #> 2 lqph 1.90 roc_auc binary 0.856 2 0.0121 #> 3 lqph 2.50 roc_auc binary 0.854 2 0.0123 #> # ℹ abbreviated name: ¹​regularization_multiplier #> # ℹ 1 more variable: .config # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":null,"dir":"Reference","previous_headings":"","what":"Parameters for maxent models — maxent_params","title":"Parameters for maxent models — maxent_params","text":"parameters auxiliary MaxEnt models using \"maxnet\" engine. functions used tuning functions, user rarely access directly.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Parameters for maxent models — maxent_params","text":"","code":"regularization_multiplier(range = c(0.5, 3), trans = NULL) feature_classes(values = c(\"l\", \"lq\", \"lqp\", \"lqph\", \"lqpht\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Parameters for maxent models — maxent_params","text":"range two-element vector holding defaults smallest largest possible values, respectively. transformation specified, values transformed units. trans trans object scales package, scales::log10_trans() scales::reciprocal_trans(). provided, default used matches units used range. transformation, NULL. values feature_classes(), character string subset \"lqpht\" (example, \"lh\")","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Parameters for maxent models — maxent_params","text":"param object can used tuning.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxent_params.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Parameters for maxent models — maxent_params","text":"","code":"regularization_multiplier() #> Reg. multiplier (quantitative) #> Range: [0.5, 3] feature_classes() #> Feature classes (qualitative) #> 5 possible values include: #> 'l', 'lq', 'lqp', 'lqph' and 'lqpht'"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":null,"dir":"Reference","previous_headings":"","what":"Wrapper to fit maxnet models with formulae — maxnet_fit","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"function wrapper around maxnet::maxnet, takes formula data well exposing parameters normalisation manner compatible parsnip. Users unlikely use function directly. parsnip model specification MaxEnt, see maxent().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"","code":"maxnet_fit( formula, data, regmult = 1, classes = \"default\", regfun = maxnet::maxnet.default.regularization, addsamplestobackground = TRUE, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"formula formula defining outcome predictors data data.frame outcomes predictors regmult numeric, constant adjust regularization classes character, continuous feature classes desired, either \"default\" subset \"lqpht\" (example, \"lh\") regfun function, computes regularization constant feature addsamplestobackground logical, TRUE add background presence sample already ... currently used.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"Maxnet returns object class maxnet, list consisting glmnet model following elements added: betas nonzero coefficients fitted model alpha constant offset making exponential model sum one background data entropy entropy exponential model penalty.factor regularization constants used feature featuremins minimum feature, used clamping featuremaxs maximum feature, used clamping varmin minimum predictor, used clamping varmax maximum predictor, used clamping samplemeans mean predictor samples (majority factors) levels levels predictor factor","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"response needs factor class representing presences reference level factor (expected classification models). good guide options Maxent model work can found https://onlinelibrary.wiley.com/doi/full/10.1111/j.1600-0587.2013.07872.x","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_fit.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Wrapper to fit maxnet models with formulae — maxnet_fit","text":"","code":"# \\donttest{ # we repeat the example in the `maxnet` package data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) mod <- maxnet_fit(presence ~ ., data = bradypus_tb, classes = \"lq\") plot(mod, \"tmp6190_ann\") # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_predict.html","id":null,"dir":"Reference","previous_headings":"","what":"Wrapper to predict maxnet models — maxnet_predict","title":"Wrapper to predict maxnet models — maxnet_predict","text":"function wrapper around predict method maxnet::maxnet, making function compatible parsnip. Users unlikely use function directly. parsnip model specification MaxEnt, see maxent().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_predict.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wrapper to predict maxnet models — maxnet_predict","text":"","code":"maxnet_predict( object, newdata, type = c(\"class\", \"prob\"), maxnet_type = c(\"cloglog\", \"link\", \"exponential\", \"logistic\"), clamp = TRUE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_predict.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Wrapper to predict maxnet models — maxnet_predict","text":"object maxnet::maxnet object newdata dataframe new data type either \"prob\" \"class\" maxnet_type transformation used prediction clamp logical, defining whether clamping observed ranges used","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/maxnet_predict.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Wrapper to predict maxnet models — maxnet_predict","text":"tibble predictions","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute overlap metrics of the two niches — niche_overlap","title":"Compute overlap metrics of the two niches — niche_overlap","text":"function computes overlap metrics two rasters. currently implements Schoener's D inverse Hellinger's distance.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute overlap metrics of the two niches — niche_overlap","text":"","code":"niche_overlap(x, y, method = c(\"Schoener\", \"Hellinger\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute overlap metrics of the two niches — niche_overlap","text":"x terra::SpatRaster single layer y terra::SpatRaster single layer method string (vector strings) taking values \"Schoener\" \"Hellinger\"","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute overlap metrics of the two niches — niche_overlap","text":"list overlap metrics, slots D (depending method)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Compute overlap metrics of the two niches — niche_overlap","text":"Note Hellinger's distance normalised dividing square root 2 (correct asymptote Hellinger's D), rather incorrect 2 used originally Warren et al (2008), based Erratum paper.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/niche_overlap.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Compute overlap metrics of the two niches — niche_overlap","text":"Warren, D.L., Glor, R.E. & Turelli M. (2008) Environmental niche equivalency versus conservativism: quantitative approaches niche evolution. Evolution 62: 2868-2883","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":null,"dir":"Reference","previous_headings":"","what":"Find threshold that optimises a given metric — optim_thresh","title":"Find threshold that optimises a given metric — optim_thresh","text":"function returns threshold turn probabilities binary classes whilst optimising given metric. Currently available tss_max, kap_max sensitivity (target sensitivity required).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find threshold that optimises a given metric — optim_thresh","text":"","code":"optim_thresh(truth, estimate, metric, event_level = \"first\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find threshold that optimises a given metric — optim_thresh","text":"truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimate predicted probability event metric character metric optimised. Currently \"tss_max\", \"kap_max\", \"sensitivity\" given target (e.g. c(\"sensitivity\",0.8)) event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default uses internal helper generally defaults \"first\"","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find threshold that optimises a given metric — optim_thresh","text":"probability threshold event","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Find threshold that optimises a given metric — optim_thresh","text":"","code":"optim_thresh(two_class_example$truth, two_class_example$Class1, metric = c(\"tss_max\")) #> [1] 0.7544818 optim_thresh(two_class_example$truth, two_class_example$Class1, metric = c(\"sens\", 0.9)) #> [1] 0.3710924"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_kap_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Find threshold that maximises Kappa — optim_thresh_kap_max","title":"Find threshold that maximises Kappa — optim_thresh_kap_max","text":"internal function returns threshold turn probabilities binary classes maximise kappa","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_kap_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find threshold that maximises Kappa — optim_thresh_kap_max","text":"","code":"optim_thresh_kap_max(presences, absences)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_kap_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find threshold that maximises Kappa — optim_thresh_kap_max","text":"presences Probabilities presences. absences Provabilities absences","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_kap_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find threshold that maximises Kappa — optim_thresh_kap_max","text":"probability threshold event","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_sens.html","id":null,"dir":"Reference","previous_headings":"","what":"Find threshold that gives a target sensitivity — optim_thresh_sens","title":"Find threshold that gives a target sensitivity — optim_thresh_sens","text":"internal function returns threshold turn probabilities binary classes given target sensitivity","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_sens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find threshold that gives a target sensitivity — optim_thresh_sens","text":"","code":"optim_thresh_sens(presences, absences, sens_target)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_sens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find threshold that gives a target sensitivity — optim_thresh_sens","text":"presences Probabilities presences. absences Provabilities absences sens_target target sensitivity","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_sens.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find threshold that gives a target sensitivity — optim_thresh_sens","text":"probability threshold event","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_tss_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Find threshold that maximises TSS — optim_thresh_tss_max","title":"Find threshold that maximises TSS — optim_thresh_tss_max","text":"internal function returns threshold turn probabilities binary classes maximise TSS","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_tss_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find threshold that maximises TSS — optim_thresh_tss_max","text":"","code":"optim_thresh_tss_max(presences, absences)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_tss_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find threshold that maximises TSS — optim_thresh_tss_max","text":"presences Probabilities presences. absences Provabilities absences","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/optim_thresh_tss_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find threshold that maximises TSS — optim_thresh_tss_max","text":"probability threshold event","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/out_of_range_warning.html","id":null,"dir":"Reference","previous_headings":"","what":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","title":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","text":"function helps making sure , assign times time_step layers raster, values badly range","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/out_of_range_warning.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","text":"","code":"out_of_range_warning(times, time_steps)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/out_of_range_warning.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","text":"times times locations time_steps time steps raster","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/out_of_range_warning.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Warn if some times are outside the range of time steps from a raster — out_of_range_warning","text":"NULL return","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/pipe.html","id":null,"dir":"Reference","previous_headings":"","what":"Pipe operator — %>%","title":"Pipe operator — %>%","text":"See magrittr::%>% details.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/pipe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pipe operator — %>%","text":"","code":"lhs %>% rhs"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/pipe.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Pipe operator — %>%","text":"lhs value magrittr placeholder. rhs function call using magrittr semantics.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/pipe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Pipe operator — %>%","text":"result calling rhs(lhs).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot presences vs background — plot_pres_vs_bg","title":"Plot presences vs background — plot_pres_vs_bg","text":"Create composite plots contrasting distribution multiple variables presences vs background.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot presences vs background — plot_pres_vs_bg","text":"","code":"plot_pres_vs_bg(.data, .col)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot presences vs background — plot_pres_vs_bg","text":".data data.frame (derived object, tibble::tibble, sf::st_sf) values bioclimate variables presences background .col column containing presences; assumes presences first level factor","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Plot presences vs background — plot_pres_vs_bg","text":"patchwork composite plot","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/plot_pres_vs_bg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot presences vs background — plot_pres_vs_bg","text":"","code":"# \\donttest{ data(\"bradypus\", package = \"maxnet\") bradypus_tb <- tibble::as_tibble(bradypus) %>% dplyr::mutate(presence = relevel( factor( dplyr::case_match(presence, 1 ~ \"presence\", 0 ~ \"absence\") ), ref = \"presence\" )) %>% select(-ecoreg) bradypus_tb %>% plot_pres_vs_bg(presence) # }"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.repeat_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict for a repeat ensemble set — predict.repeat_ensemble","title":"Predict for a repeat ensemble set — predict.repeat_ensemble","text":"Predict new dataset using repeat ensemble. Predictions individual models combined according fun","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.repeat_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict for a repeat ensemble set — predict.repeat_ensemble","text":"","code":"# S3 method for class 'repeat_ensemble' predict( object, new_data, type = \"prob\", fun = \"mean\", metric_thresh = NULL, class_thresh = NULL, members = FALSE, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.repeat_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict for a repeat ensemble set — predict.repeat_ensemble","text":"object repeat_ensemble object new_data data frame look variables predict. type type prediction, \"prob\" \"class\". fun string defining aggregating function. can take values mean, median, weighted_mean, weighted_median none. possible combine multiple functions, except \"none\". set \"none\", individual member predictions returned (automatically sets member TRUE) metric_thresh vector length 2 giving metric threshold, used prune models ensemble used prediction. 'metrics' need computed workflow tuned. Examples c(\"accuracy\",0.8) c(\"boyce_cont\",0.7) class_thresh probability threshold used convert probabilities classes. can number (0 1), character metric (currently \"tss_max\" \"sensitivity\"). sensitivity, additional target value passed along second element vector, e.g. c(\"sensitivity\",0.8). members boolean defining whether individual predictions member added ensemble prediction. columns individual members name workflow prefix, separated \".\" usual column names predictions. ... used method.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.repeat_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict for a repeat ensemble set — predict.repeat_ensemble","text":"tibble predictions","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.simple_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict for a simple ensemble set — predict.simple_ensemble","title":"Predict for a simple ensemble set — predict.simple_ensemble","text":"Predict new dataset using simple ensemble. Predictions individual models (.e. workflows) combined according fun","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.simple_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict for a simple ensemble set — predict.simple_ensemble","text":"","code":"# S3 method for class 'simple_ensemble' predict( object, new_data, type = \"prob\", fun = \"mean\", metric_thresh = NULL, class_thresh = NULL, members = FALSE, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.simple_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict for a simple ensemble set — predict.simple_ensemble","text":"object simple_ensemble object new_data data frame look variables predict. type type prediction, \"prob\" \"class\". fun string defining aggregating function. can take values mean, median, weighted_mean, weighted_median none. possible combine multiple functions, except \"none\". set \"none\", individual member predictions returned (automatically sets member TRUE) metric_thresh vector length 2 giving metric threshold, used prune models ensemble used prediction. 'metrics' need computed workflow tuned. Examples c(\"accuracy\",0.8) c(\"boyce_cont\",0.7) class_thresh probability threshold used convert probabilities classes. can number (0 1), character metric (currently \"tss_max\" \"sensitivity\"). sensitivity, additional target value passed along second element vector, e.g. c(\"sensitivity\",0.8). members boolean defining whether individual predictions member added ensemble prediction. columns individual members name workflow prefix, separated \".\" usual column names predictions. ... used method.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict.simple_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict for a simple ensemble set — predict.simple_ensemble","text":"tibble predictions","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict_raster.html","id":null,"dir":"Reference","previous_headings":"","what":"Make predictions for a whole raster — predict_raster","title":"Make predictions for a whole raster — predict_raster","text":"function allows use raster data make predictions variety tidymodels objects, simple_ensemble stacks::stacks","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict_raster.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make predictions for a whole raster — predict_raster","text":"","code":"predict_raster(object, raster, ...) # Default S3 method predict_raster(object, raster, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict_raster.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make predictions for a whole raster — predict_raster","text":"object tidymodels object interest raster terra::SpatRaster input data. include levels names variables used object ... parameters passed standard predict() function appropriate object type (e.g. metric_thresh class_thresh).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/predict_raster.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make predictions for a whole raster — predict_raster","text":"terra::SpatRaster predictions","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":null,"dir":"Reference","previous_headings":"","what":"Probability metrics for sf objects — prob_metrics_sf","title":"Probability metrics for sf objects — prob_metrics_sf","text":"tidysdm provides specialised metrics SDMs, help pages(boyce_cont(), kap_max(), tss_max()). Additionally, also provides methods handle sf::sf objects following standard yardstick metrics: yardstick::average_precision() yardstick::brier_class() yardstick::classification_cost() yardstick::gain_capture() yardstick::mn_log_loss() yardstick::pr_auc() yardstick::roc_auc() yardstick::roc_aunp() yardstick::roc_aunu()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Probability metrics for sf objects — prob_metrics_sf","text":"","code":"# S3 method for class 'sf' average_precision(data, ...) # S3 method for class 'sf' brier_class(data, ...) # S3 method for class 'sf' classification_cost(data, ...) # S3 method for class 'sf' gain_capture(data, ...) # S3 method for class 'sf' mn_log_loss(data, ...) # S3 method for class 'sf' pr_auc(data, ...) # S3 method for class 'sf' roc_auc(data, ...) # S3 method for class 'sf' roc_aunp(data, ...) # S3 method for class 'sf' roc_aunu(data, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Probability metrics for sf objects — prob_metrics_sf","text":"data sf::sf object ... parameters pass data.frame version metric. See specific man page metric interest.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Probability metrics for sf objects — prob_metrics_sf","text":"tibble columns .metric, .estimator, .estimate 1 row values.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_metrics_sf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Probability metrics for sf objects — prob_metrics_sf","text":"Note roc_aunp roc_aunu multiclass metrics, relevant SDMs (work binary response). included completeness, class probability metrics yardstick sf method, applications SDMs.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_to_binary.html","id":null,"dir":"Reference","previous_headings":"","what":"simple function to convert probability to binary classes — prob_to_binary","title":"simple function to convert probability to binary classes — prob_to_binary","text":"simple function convert probability binary classes","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_to_binary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"simple function to convert probability to binary classes — prob_to_binary","text":"","code":"prob_to_binary(x, thresh, class_levels)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_to_binary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"simple function to convert probability to binary classes — prob_to_binary","text":"x vector probabilities thresh threshold convert binary class_levels binary levels","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/prob_to_binary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"simple function to convert probability to binary classes — prob_to_binary","text":"vector binary values","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":null,"dir":"Reference","previous_headings":"","what":"Recipe for sf objects — recipe.sf","title":"Recipe for sf objects — recipe.sf","text":"method recipes::recipe() handles case x sf::sf object, commonly used Species Distribution Model, generates spatial_recipe.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Recipe for sf objects — recipe.sf","text":"","code":"# S3 method for class 'sf' recipe(x, ...) spatial_recipe(x, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Recipe for sf objects — recipe.sf","text":"x sf::sf data frame. ... parameters passed recipes::recipe()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Recipe for sf objects — recipe.sf","text":"object class spatial_recipe, derived version recipes::recipe() , see manpage recipes::recipe() details.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/recipe.sf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Recipe for sf objects — recipe.sf","text":"recipes::recipe() natively compatible sf::sf objects. problem geometry column sf::sf objects list, incompatible translation formulae recipes::recipe(). method strips geometry column data.frame replaces simple X Y columns operations, thus allowing usual processing recipes::recipe() succeed (X Y give role coords spatial recipe). prepping baking spatial_recipe, data.frame tibble without coordinates used training new_data, dummy X Y columns generated filled NAs. NOTE order matters! need use syntax recipe(x=sf_obj, formula=class~.) method successfully detect sf::sf object. Starting formula fail.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/repeat_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Repeat ensemble — repeat_ensemble","title":"Repeat ensemble — repeat_ensemble","text":"ensemble based multiple sets pseudoabsences/background. object collection (list) simple_ensemble objects predictions combined simple way (e.g. taking either mean median). simple_ensemble contains best version given model type following turning; simple ensembles need metric estimated cv process.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/repeat_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Repeat ensemble — repeat_ensemble","text":"","code":"repeat_ensemble(...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/repeat_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Repeat ensemble — repeat_ensemble","text":"... used, function just creates empty repeat_ensemble object. Members added add_best_candidates()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/repeat_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Repeat ensemble — repeat_ensemble","text":"empty repeat_ensemble","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample background points for SDM analysis — sample_background","title":"Sample background points for SDM analysis — sample_background","text":"function samples background points raster given set presences. locations returned center points sampled cells, can overlap presences (contrast pseudo-absences, see sample_pseudoabs). following methods implemented: 'random': background randomly sampled region covered raster (.e. NAs). 'dist_max': background randomly sampled unioned buffers 'dist_max' presences (distances 'm' lonlat rasters, map units projected rasters). Using union buffers means areas multiple buffers oversampled. also referred \"thickening\". 'bias': background points sampled according surface representing biased sampling effort.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample background points for SDM analysis — sample_background","text":"","code":"sample_background( data, raster, n, coords = NULL, method = \"random\", class_label = \"background\", return_pres = TRUE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample background points for SDM analysis — sample_background","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster cells sampled (first layer used determine cells NAs, thus can sampled). sampling \"biased\", sampling probability proportional values first layer (.e. band) raster. n number background points sample. coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\"). method sampling method. One 'random', 'dist_max', 'targeted'. dist_max, maximum distance set additional element vector, e.g c('dist_max',70000). class_label label given sampled points. Defaults background return_pres return presences together background single tibble.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample background points for SDM analysis — sample_background","text":"object class tibble::tibble. presences returned, presence level set reference (match expectations yardstick package considers first level event).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Sample background points for SDM analysis — sample_background","text":"Note units distance depend projection raster.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background_time.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample background points for SDM analysis for points with a time point. — sample_background_time","title":"Sample background points for SDM analysis for points with a time point. — sample_background_time","text":"function samples background points raster given set presences. locations returned center points sampled cells,, can overlap presences (contrast pseudo-absences, see sample_pseudoabs_time). following methods implemented: 'random': background points randomly sampled region covered raster (.e. NAs). 'dist_max': background points randomly sampled unioned buffers 'dist_max' presences (distances 'm' lonlat rasters, map units projected rasters). Using union buffers means areas multiple buffers oversampled. also referred \"thickening\". 'bias': background points sampled according surface representing biased sampling effort. Note surface time step normalised sum 1;use n_per_time_step affect sampling effort within time step.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background_time.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample background points for SDM analysis for points with a time point. — sample_background_time","text":"","code":"sample_background_time( data, raster, n_per_time_step, coords = NULL, time_col = \"time\", lubridate_fun = c, method = \"random\", class_label = \"background\", return_pres = TRUE, time_buffer = 0 )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background_time.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample background points for SDM analysis for points with a time point. — sample_background_time","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster terra::SpatRasterDataset cells sampled. terra::SpatRasterDataset, first dataset used define cells valid, NAs. n_per_time_step number background points sample time step (.e. vector length equal number time steps raster) coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") time_col name column time; time lubridate object, use lubridate_fun provide function can used convert appropriately lubridate_fun function convert time column lubridate object method sampling method. One 'random', 'dist_min', 'dist_max', 'dist_disc'. class_label label given sampled points. Defaults background return_pres return presences together background single tibble time_buffer buffer time axis around presences defines effect sampling background method 'max_dist'. set zero, presences effect time step assigned raster; positive value, defines number days date provided time column presence considered (e.g. 20 days means presence considered time steps equivalent plus minus twenty days date).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_background_time.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample background points for SDM analysis for points with a time point. — sample_background_time","text":"object class tibble::tibble. presences returned, presence level set reference (match expectations yardstick package considers first level event)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","title":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","text":"function samples pseudo-absence points raster given set presences. locations returned center points sampled cells, can overlap presences (contrast background points, see sample_background). following methods implemented: 'random': pseudo-absences randomly sampled region covered raster (.e. NAs). 'dist_min': pseudo-absences randomly sampled region excluding buffer 'dist_min' presences (distances 'm' lonlat rasters, map units projected rasters). 'dist_max': pseudo-absences randomly sampled unioned buffers 'dist_max' presences (distances 'm' lonlat rasters, map units projected rasters). Using union buffers means areas multiple buffers oversampled. also referred \"thickening\". 'dist_disc': pseudo-absences randomly sampled unioned discs around presences two values 'dist_disc' defining minimum maximum distance presences.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","text":"","code":"sample_pseudoabs( data, raster, n, coords = NULL, method = \"random\", class_label = \"pseudoabs\", return_pres = TRUE )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster cells sampled n number pseudoabsence points sample coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") method sampling method. One 'random', 'dist_min', 'dist_max', 'dist_disc'. Threshold distances set additional elements vector, e.g c('dist_min',70000) c('dist_disc',50000,200000). class_label label given sampled points. Defaults pseudoabs return_pres return presences together pseudoabsences single tibble","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample pseudo-absence points for SDM analysis — sample_pseudoabs","text":"object class tibble::tibble. presences returned, presence level set reference (match expectations yardstick package considers first level event)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs_time.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","title":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","text":"function samples pseudo-absence points raster given set presences. locations returned center points sampled cells, can overlap presences (contrast background points, see sample_background_time). following methods implemented: 'random': pseudo-absences randomly sampled region covered raster (.e. NAs). 'dist_min': pseudo-absences randomly sampled region excluding buffer 'dist_min' presences (distances 'm' lonlat rasters, map units projected rasters). 'dist_max': pseudo-absences randomly sampled unioned buffers 'dist_max' presences (distances 'm' lonlat rasters, map units projected rasters). Using union buffers means areas multiple buffers oversampled. also referred \"thickening\". 'dist_disc': pseudo-absences randomly sampled unioned discs around presences two values 'dist_disc' defining minimum maximum distance presences.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs_time.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","text":"","code":"sample_pseudoabs_time( data, raster, n_per_presence, coords = NULL, time_col = \"time\", lubridate_fun = c, method = \"random\", class_label = \"pseudoabs\", return_pres = TRUE, time_buffer = 0 )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs_time.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster terra::SpatRasterDataset cells sampled. terra::SpatRasterDataset, first dataset used define cells valid, NAs. n_per_presence number pseudoabsence points sample presence coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") time_col name column time; time lubridate object, use lubridate_fun provide function can used convert appropriately lubridate_fun function convert time column lubridate object method sampling method. One 'random', 'dist_min', 'dist_max', 'dist_disc'. class_label label given sampled points. Defaults pseudoabs return_pres return presences together pseudoabsences single tibble time_buffer buffer time axis around presences defines effect sampling pseudoabsences. set zero, presences effect time step assigned raster; positive value, defines number days date provided time column presence considered (e.g. 20 days means presence considered time steps equivalent plus minus twenty days date).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sample_pseudoabs_time.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample pseudo-absence points for SDM analysis for points with a time point. — sample_pseudoabs_time","text":"object class tibble::tibble. presences returned, presence level set reference (match expectations yardstick package considers first level event)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":null,"dir":"Reference","previous_headings":"","what":"Metric set for SDM — sdm_metric_set","title":"Metric set for SDM — sdm_metric_set","text":"function returns yardstick::metric_set includes boyce_cont(), yardstick::roc_auc() tss_max(), commonly used metrics SDM.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Metric set for SDM — sdm_metric_set","text":"","code":"sdm_metric_set(...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Metric set for SDM — sdm_metric_set","text":"... additional metrics added yardstick::metric_set. See help yardstick::metric_set() constraints type metrics can mixed.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Metric set for SDM — sdm_metric_set","text":"yardstick::metric_set object.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_metric_set.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Metric set for SDM — sdm_metric_set","text":"","code":"sdm_metric_set() #> A metric set, consisting of: #> - `boyce_cont()`, a probability metric | direction: maximize #> - `roc_auc()`, a probability metric | direction: maximize #> - `tss_max()`, a probability metric | direction: maximize sdm_metric_set(accuracy) #> A metric set, consisting of: #> - `boyce_cont()`, a probability metric | direction: maximize #> - `roc_auc()`, a probability metric | direction: maximize #> - `tss_max()`, a probability metric | direction: maximize #> - `accuracy()`, a class metric | direction: maximize"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"function returns parsnip::model_spec Boosted Trees model used classifier presences absences Species Distribution Model. uses library xgboost fit boosted trees; use another library, simply build parsnip::model_spec directly.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"","code":"sdm_spec_boost_tree(..., tune = c(\"sdm\", \"all\", \"custom\", \"none\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"... parameters passed parsnip::boost_tree() customise model. See help function details. tune character defining tuning strategy. Valid strategies : \"sdm\" chooses hyperparameters important tune sdm (boost_tree: 'mtry', 'trees', 'tree_depth', 'learn_rate', 'loss_reduction', 'stop_iter') \"\" tunes hyperparameters (boost_tree: 'mtry', 'trees', 'tree_depth', 'learn_rate', 'loss_reduction', 'stop_iter','min_n' 'sample_size') \"custom\" passes options '...' \"none\" tune hyperparameter","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"parsnip::model_spec model.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_boost_tree.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a Boosted Trees model for SDM — sdm_spec_boost_tree","text":"","code":"standard_bt_spec <- sdm_spec_boost_tree() full_bt_spec <- sdm_spec_boost_tree(tune = \"all\") custom_bt_spec <- sdm_spec_boost_tree(tune = \"custom\", mtry = tune())"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a GAM for SDM — sdm_spec_gam","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"function returns parsnip::model_spec General Additive Model used classifier presences absences Species Distribution Model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"","code":"sdm_spec_gam(..., tune = \"none\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"... parameters passed parsnip::gen_additive_mod() customise model. See help function details. tune character defining tuning strategy. hyperparameters tune gam, valid option \"none\". parameter present consistency sdm_spec_* functions, nothing case.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"parsnip::model_spec model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"Note , using GAMs workflow_set(), necessary update model gam_formula() (see parsnip::model_formula discussion formulas special terms tidymodels):","code":"workflow_set( preproc = list(default = my_recipe), models = list(gam = sdm_spec_gam()), cross = TRUE ) %>% update_workflow_model(\"default_gam\", spec = sdm_spec_gam(), formula = gam_formula(my_recipe))"},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_gam.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a GAM for SDM — sdm_spec_gam","text":"","code":"my_gam_spec <- sdm_spec_gam()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a GLM for SDM — sdm_spec_glm","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"function returns parsnip::model_spec Generalised Linear Model used classifier presences absences Species Distribution Model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"","code":"sdm_spec_glm(..., tune = \"none\")"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"... parameters passed parsnip::logistic_reg() customise model. See help function details. tune character defining tuning strategy. hyperparameters tune glm, valid option \"none\". parameter present consistency sdm_spec_* functions, nothing case.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"parsnip::model_spec model.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_glm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a GLM for SDM — sdm_spec_glm","text":"","code":"my_spec_glm <- sdm_spec_glm()"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"function returns parsnip::model_spec MaxEnt model used Species Distribution Models.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"","code":"sdm_spec_maxent(..., tune = c(\"sdm\", \"all\", \"custom\", \"none\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"... parameters passed maxent() customise model. See help function details. tune character defining tuning strategy. Valid strategies : \"sdm\" chooses hyper-parameters important tune sdm (maxent, 'mtry') \"\" tunes hyperparameters (maxent, 'mtry', 'trees' 'min') \"custom\" passes options '...' \"none\" tune hyperparameter","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"parsnip::model_spec model.","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_maxent.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a MaxEnt for SDM — sdm_spec_maxent","text":"","code":"test_maxent_spec <- sdm_spec_maxent(tune = \"sdm\") test_maxent_spec #> maxent Model Specification (classification) #> #> Main Arguments: #> feature_classes = tune() #> regularization_multiplier = tune() #> #> Computational engine: maxnet #> # setting specific values sdm_spec_maxent(tune = \"custom\", feature_classes = \"lq\") #> maxent Model Specification (classification) #> #> Main Arguments: #> feature_classes = lq #> #> Computational engine: maxnet #>"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":null,"dir":"Reference","previous_headings":"","what":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"function returns parsnip::model_spec Random Forest used classifier presences absences Species Distribution Models. uses library ranger fit boosted trees; use another library, simply build parsnip::model_spec directly.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"","code":"sdm_spec_rand_forest(..., tune = c(\"sdm\", \"all\", \"custom\", \"none\")) sdm_spec_rf(..., tune = c(\"sdm\", \"all\", \"custom\", \"none\"))"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"... parameters passed parsnip::rand_forest() customise model. See help function details. tune character defining tuning strategy. Valid strategies : \"sdm\" chooses hyperparameters important tune sdm (rf, 'mtry') \"\" tunes hyperparameters (rf, 'mtry', 'trees' 'min') \"custom\" passes options '...' \"none\" tune hyperparameter","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"parsnip::model_spec model.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"sdm_spec_rf() simply short form sm_spec_rand_forest().","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/sdm_spec_rand_forest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model specification for a Random Forest for SDM — sdm_spec_rand_forest","text":"","code":"test_rf_spec <- sdm_spec_rf(tune = \"sdm\") test_rf_spec #> Random Forest Model Specification (classification) #> #> Main Arguments: #> mtry = tune() #> #> Computational engine: ranger #> # combining tuning with specific values for other hyperparameters sdm_spec_rf(tune = \"sdm\", trees = 100) #> Random Forest Model Specification (classification) #> #> Main Arguments: #> mtry = tune() #> trees = 100 #> #> Computational engine: ranger #>"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/simple_ensemble.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple ensemble — simple_ensemble","title":"Simple ensemble — simple_ensemble","text":"simple ensemble collection workflows predictions combined simple way (e.g. taking either mean median). Usually workflows consists best version given model algorithm following tuning. workflows fitted full training dataset making predictions.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/simple_ensemble.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple ensemble — simple_ensemble","text":"","code":"simple_ensemble(...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/simple_ensemble.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple ensemble — simple_ensemble","text":"... used, function just creates empty simple_ensemble object. Members added add_best_candidates()","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/simple_ensemble.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple ensemble — simple_ensemble","text":"empty simple_ensemble. tibble columns: wflow_id: name workflows best model chosen workflow: trained workflow objects metrics: metrics based crossvalidation resampling used tune models","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":null,"dir":"Reference","previous_headings":"","what":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"spatial_initial_split creates single binary split data training set testing set. strategies package spatialsample available; random split strategy used generate initial split.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"","code":"spatial_initial_split(data, prop, strategy, ...)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"data dataset (data.frame tibble) prop proportion data retained modelling/analysis. strategy sampling strategy spatialsample ... parameters passed strategy","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"rsplit object can used rsample::training rsample::testing functions extract data split.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/spatial_initial_split.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Simple Training/Test Set Splitting for spatial data — spatial_initial_split","text":"","code":"set.seed(123) block_initial <- spatial_initial_split(boston_canopy, prop = 1 / 5, spatial_block_cv) testing(block_initial) #> Simple feature collection with 153 features and 18 fields #> Geometry type: MULTIPOLYGON #> Dimension: XY #> Bounding box: xmin: 745098 ymin: 2915630 xmax: 805045.8 ymax: 2969840 #> Projected CRS: NAD83 / Massachusetts Mainland (ftUS) #> # A tibble: 153 × 19 #> grid_id land_area canopy_gain canopy_loss canopy_no_change canopy_area_2014 #> #> 1 M-9 2690727. 52443. 53467. 304239. 357706. #> 2 Q-21 2690727. 54712. 101816. 1359305. 1461121. #> 3 AB-23 725043. 13737. 13278. 52628. 65906. #> 4 AC-15 1175032. 24517. 24010. 111148. 135158. #> 5 U-25 2691491. 83740. 117496. 601040. 718536. #> 6 Y-13 2691490. 79215. 41676. 312299. 353975. #> 7 M-10 2578879. 27026. 41240. 161115. 202355. #> 8 T-22 2691490. 80929. 140490. 573628. 714118. #> 9 AO-16 1717547. 64863. 52390. 465563. 517953. #> 10 X-23 2690728. 85198. 109044. 458205. 567249. #> # ℹ 143 more rows #> # ℹ 13 more variables: canopy_area_2019 , change_canopy_area , #> # change_canopy_percentage , canopy_percentage_2014 , #> # canopy_percentage_2019 , change_canopy_absolute , #> # mean_temp_morning , mean_temp_evening , mean_temp , #> # mean_heat_index_morning , mean_heat_index_evening , #> # mean_heat_index , geometry training(block_initial) #> Simple feature collection with 529 features and 18 fields #> Geometry type: MULTIPOLYGON #> Dimension: XY #> Bounding box: xmin: 739826.9 ymin: 2908294 xmax: 812069.7 ymax: 2970073 #> Projected CRS: NAD83 / Massachusetts Mainland (ftUS) #> # A tibble: 529 × 19 #> grid_id land_area canopy_gain canopy_loss canopy_no_change canopy_area_2014 #> #> 1 AB-4 795045. 15323. 3126. 53676. 56802. #> 2 I-33 265813. 8849. 11795. 78677. 90472. #> 3 AO-9 270153 6187. 1184. 26930. 28114. #> 4 H-10 2691490. 73098. 80362. 345823. 426185. #> 5 V-7 107890. 219. 3612. 240. 3852. #> 6 Q-22 2648089. 122211. 154236. 1026632. 1180868. #> 7 X-4 848558. 8275. 1760. 6872. 8632. #> 8 P-18 2690726. 110928. 113146. 915137. 1028283. #> 9 J-29 2574479. 38069. 15530. 2388638. 2404168. #> 10 G-28 2641525. 87024. 39246. 1202528. 1241774. #> # ℹ 519 more rows #> # ℹ 13 more variables: canopy_area_2019 , change_canopy_area , #> # change_canopy_percentage , canopy_percentage_2014 , #> # canopy_percentage_2019 , change_canopy_absolute , #> # mean_temp_morning , mean_temp_evening , mean_temp , #> # mean_heat_index_morning , mean_heat_index_evening , #> # mean_heat_index , geometry "},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":null,"dir":"Reference","previous_headings":"","what":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"function thins dataset one observation per cell retained.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"","code":"thin_by_cell(data, raster, coords = NULL, drop_na = TRUE, agg_fact = NULL)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster object defined grid coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") drop_na boolean whether locations NA raster dropped. agg_fact positive integer. Aggregation factor expressed number cells direction (horizontally vertically). two integers (horizontal vertical aggregation factor) three integers (also aggregating layers). Defaults NULL, implies aggregation (.e. thinning done grid raster)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"object class sf::sf data.frame, \"data\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Thin point dataset to have 1 observation per raster cell — thin_by_cell","text":"thinning can achieved aggregating cells raster thinning, achieved setting agg_fact > 1 (aggregation works manner equivalent terra::aggregate()).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":null,"dir":"Reference","previous_headings":"","what":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"function thins dataset one observation per cell per time slice retained. use raster layers time slices define data cube thinning enforced (see details time formatted).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"","code":"thin_by_cell_time( data, raster, coords = NULL, time_col = \"time\", lubridate_fun = c, drop_na = TRUE, agg_fact = NULL )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). raster terra::SpatRaster object defined grid layers corresponding time slices (times set either POSIXlt \"years\", see terra::time() details), terra::SpatRasterDataset first dataset used (, times dataset set either POSIXlt \"years\") terra::time() coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") time_col name column time; time lubridate object, use lubridate_fun provide function can used convert appropriately lubridate_fun function convert time column lubridate object drop_na boolean whether locations NA raster dropped. agg_fact positive integer. Aggregation factor expressed number cells direction (horizontally vertically). two integers (horizontal vertical aggregation factor) three integers (also aggregating layers). Defaults NULL, implies aggregation (.e. thinning done grid raster)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"object class sf::sf data.frame, \"data\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_cell_time.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Thin point dataset to have 1 observation per raster cell per time slice — thin_by_cell_time","text":"spatial thinning can achieved aggregating cells raster thinning, achieved setting agg_fact > 1 (aggregation works manner equivalent terra::aggregate()).","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":null,"dir":"Reference","previous_headings":"","what":"Thin points dataset based on geographic distance — thin_by_dist","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"function thins dataset observations distance greater \"dist_min\" retained.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"","code":"thin_by_dist(data, dist_min, coords = NULL)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). dist_min Minimum distance points (units appropriate projection, meters lonlat data). coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\")","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"object class sf::sf data.frame, \"data\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Thin points dataset based on geographic distance — thin_by_dist","text":"Distances measured appropriate units projection used. case raw latitude longitude (e.g. provided data.frame), crs set WGS84, units set meters. function modified version algorithm spThin, adapted work sf objects.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":null,"dir":"Reference","previous_headings":"","what":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"function thins dataset observations distance greater \"dist_min\" space \"interval_min\" time retained.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"","code":"thin_by_dist_time( data, dist_min, interval_min, coords = NULL, time_col = \"time\", lubridate_fun = c )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"data sf::sf data frame, data frame coordinate variables. can defined coords, unless standard names (see details ). dist_min Minimum distance points (units appropriate projection, meters lonlat data). interval_min Minimum time interval points, days. coords vector length two giving names \"x\" \"y\" coordinates, found data. left NULL, function try guess columns based standard names c(\"x\", \"y\"), c(\"X\",\"Y\"), c(\"longitude\", \"latitude\"), c(\"lon\", \"lat\") time_col name column time; time lubridate object, use lubridate_fun provide function can used convert appropriately lubridate_fun function convert time column lubridate object","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"object class sf::sf data.frame, \"data\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/thin_by_dist_time.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Thin points dataset based on geographic and temporal distance — thin_by_dist_time","text":"Geographic distances measured appropriate units projection used. case raw latitude longitude (e.g. provided data.frame), crs set WGS84, units set meters. Time interval estimated days. Note long time period, simple conversion x years = 365 * x days might lead slightly shorter intervals expected, ignores leap years. function y2d() provides closer approximation. function algorithm analogous spThin, exception neighbours defined terms space time.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tidysdm-package.html","id":null,"dir":"Reference","previous_headings":"","what":"tidysdm: Species Distribution Models with Tidymodels — tidysdm-package","title":"tidysdm: Species Distribution Models with Tidymodels — tidysdm-package","text":"Fit species distribution models (SDMs) using 'tidymodels' framework, provides standardised interface define models process outputs. 'tidysdm' expands 'tidymodels' providing methods spatial objects, models metrics specific SDMs, well number specialised functions process occurrences contemporary palaeo datasets. full functionalities package described Leonardi et al. (2023) doi:10.1101/2023.07.24.550358 .","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tidysdm-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"tidysdm: Species Distribution Models with Tidymodels — tidysdm-package","text":"Maintainer: Andrea Manica am315@cam.ac.uk Authors: Michela Leonardi Margherita Colucci Andrea Vittorio Pozzi Eleanor M.L. Scerri","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":null,"dir":"Reference","previous_headings":"","what":"TSS - True Skill Statistics — tss","title":"TSS - True Skill Statistics — tss","text":"True Skills Statistic, defined ","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"TSS - True Skill Statistics — tss","text":"","code":"tss(data, ...) # S3 method for class 'data.frame' tss( data, truth, estimate, estimator = NULL, na_rm = TRUE, case_weights = NULL, event_level = \"first\", ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"TSS - True Skill Statistics — tss","text":"data Either data.frame containing columns specified truth estimate arguments, table/matrix true class results columns table. ... currently used. truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimate column identifier predicted class results (also factor). truth can specified different ways primary method use unquoted variable name. _vec() functions, factor vector. estimator One : \"binary\", \"macro\", \"macro_weighted\", \"micro\" specify type averaging done. \"binary\" relevant two class case. three general methods calculating multiclass metrics. default automatically choose \"binary\" \"macro\" based estimate. na_rm logical value indicating whether NA values stripped computation proceeds. case_weights optional column identifier case weights. unquoted column name evaluates numeric column data. _vec() functions, numeric vector. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default \"first\".","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"TSS - True Skill Statistics — tss","text":"tibble columns .metric, .estimator, .estimate 1 row values. grouped data frames, number rows returned number groups.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"TSS - True Skill Statistics — tss","text":"sensitivity+specificity +1 function wrapper around yardstick::j_index(), another name quantity. Note function takes classes predicted model without calibration (.e. making split 0.5 probability). usually metric used Species Distribution Models, threshold recalibrated maximise TSS; purpose, use tss_max().","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"TSS - True Skill Statistics — tss","text":"","code":"# Two class data(\"two_class_example\") tss(two_class_example, truth, predicted) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 tss binary 0.673 # Multiclass library(dplyr) data(hpc_cv) # Groups are respected hpc_cv %>% group_by(Resample) %>% tss(obs, pred) #> # A tibble: 10 × 4 #> Resample .metric .estimator .estimate #> #> 1 Fold01 tss macro 0.434 #> 2 Fold02 tss macro 0.422 #> 3 Fold03 tss macro 0.533 #> 4 Fold04 tss macro 0.449 #> 5 Fold05 tss macro 0.431 #> 6 Fold06 tss macro 0.413 #> 7 Fold07 tss macro 0.398 #> 8 Fold08 tss macro 0.468 #> 9 Fold09 tss macro 0.435 #> 10 Fold10 tss macro 0.412"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum TSS - True Skill Statistics — tss_max","title":"Maximum TSS - True Skill Statistics — tss_max","text":"True Skills Statistic, defined ","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum TSS - True Skill Statistics — tss_max","text":"","code":"tss_max(data, ...) # S3 method for class 'data.frame' tss_max( data, truth, ..., estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL ) # S3 method for class 'sf' tss_max(data, ...) tss_max_vec( truth, estimate, estimator = NULL, na_rm = TRUE, event_level = \"first\", case_weights = NULL, ... )"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum TSS - True Skill Statistics — tss_max","text":"data Either data.frame containing columns specified truth estimate arguments, table/matrix true class results columns table. ... set unquoted column names one dplyr selector functions choose variables contain class probabilities. truth binary, 1 column selected, correspond value event_level. Otherwise, many columns factor levels truth ordering columns factor levels truth. truth column identifier true class results (factor). unquoted column name although argument passed expression supports quasiquotation (can unquote column names). _vec() functions, factor vector. estimator One \"binary\", \"hand_till\", \"macro\", \"macro_weighted\" specify type averaging done. \"binary\" relevant two class case. others general methods calculating multiclass metrics. default automatically choose \"binary\" truth binary, \"hand_till\" truth >2 levels case_weights specified, \"macro\" truth >2 levels case_weights specified (case \"hand_till\" well-defined). na_rm logical value indicating whether NA values stripped computation proceeds. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". argument applicable estimator = \"binary\". default uses internal helper generally defaults \"first\" case_weights optional column identifier case weights. unquoted column name evaluates numeric column data. _vec() functions, numeric vector. estimate truth binary, numeric vector class probabilities corresponding \"relevant\" class. Otherwise, matrix many columns factor levels truth. assumed order levels truth.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum TSS - True Skill Statistics — tss_max","text":"tibble columns .metric, .estimator, .estimate 1 row values. grouped data frames, number rows returned number groups.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum TSS - True Skill Statistics — tss_max","text":"sensitivity+specificity +1 function calibrates probability threshold classify presences maximise TSS. multiclass version function, operates binary predictions (e.g. presences absences SDMs).","code":""},{"path":[]},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/tss_max.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum TSS - True Skill Statistics — tss_max","text":"","code":"tss_max(two_class_example, truth, Class1) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 tss_max binary 0.728"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert a time interval from years to days — y2d","title":"Convert a time interval from years to days — y2d","text":"function takes takes time interval years converts days, unit commonly used time operations R. simple conversion x * 365 work large number years, due presence leap years.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert a time interval from years to days — y2d","text":"","code":"y2d(x)"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert a time interval from years to days — y2d","text":"x number years interval","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert a time interval from years to days — y2d","text":"difftime object (days)","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/reference/y2d.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert a time interval from years to days — y2d","text":"","code":"y2d(1) #> Time difference of 365 days y2d(1000) #> Time difference of 365243 days"},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-095","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.5","title":"tidysdm 0.9.5","text":"CRAN release: 2024-06-23 implement clamping MESS manage extrapolation clearly separate sampling background vs pseudo-absences update vignettes","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-094","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.4","title":"tidysdm 0.9.4","text":"CRAN release: 2024-03-05 fix predict* functions prevented fixed threshold used assign classes ensure compatibility upcoming changes tune","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-093","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.3","title":"tidysdm 0.9.3","text":"CRAN release: 2024-01-17 fix bug filter_high_cor due changes terra 1.6.75 implement collect_metrics ensembles.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-092","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.2","title":"tidysdm 0.9.2","text":"CRAN release: 2023-11-13 Release CRAN","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-091","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.1","title":"tidysdm 0.9.1","text":"Add spatial_recipe class. BREAKING change makes previously saved objects unusable, old code work expected. Additional articles showing use additional tidymodels features, debug errors. Integration DALEX explain models. functions select variables.","code":""},{"path":"https://evolecolgroup.github.io/tidysdm/dev/news/index.html","id":"tidysdm-090","dir":"Changelog","previous_headings":"","what":"tidysdm 0.9.0","title":"tidysdm 0.9.0","text":"Initial release GitHub.","code":""}]