From 3aebe9a25cf1fe1d567d3faf8dd14016df8a5aa2 Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Thu, 16 Nov 2023 11:34:12 +1100 Subject: [PATCH] Remove unneeded files (#785) * remove file not required in austraits.build * Recover deleted files * Update to run --------- Co-authored-by: Daniel Falster --- config/traits.build_v000.owl | 8748 ----------------------- inst/support/austraits.build_schema.yml | 419 -- inst/support/report_dataset.Rmd | 809 --- tests/build/helper.R | 5 +- tests/build/test-axrxiv_constancy.R | 6 +- tests/functions.R | 126 + tests/testthat.R | 4 - tests/testthat/.gitignore | 2 - 8 files changed, 130 insertions(+), 9989 deletions(-) delete mode 100644 config/traits.build_v000.owl delete mode 100644 inst/support/austraits.build_schema.yml delete mode 100644 inst/support/report_dataset.Rmd create mode 100644 tests/functions.R delete mode 100644 tests/testthat.R delete mode 100644 tests/testthat/.gitignore diff --git a/config/traits.build_v000.owl b/config/traits.build_v000.owl deleted file mode 100644 index 42e788d9c..000000000 --- a/config/traits.build_v000.owl +++ /dev/null @@ -1,8748 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - editor preferred term - - The concise, meaningful, and human-friendly name for a class or property preferred by the ontology developers. (US-English) - PERSON:Daniel Schober - - editor preferred term - - - - - - - - example of usage - - example of usage - - - - - - - - - has curation status - - has curation status - has curation status - - - - - - - - definition - - definition - textual definition - - - - - - - - term editor - - term editor - - - - - - - - definition source - - definition source - - - - - - - - imported from - For external terms/classes, the ontology from which the term was imported - - imported from - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - This property is intended to be used with non-literal values. This property is an inverse property of References. - A related resource that references, cites, or otherwise points to the described resource. - - Is Referenced By - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - An object property permitting specification of the identifier scheme used to provide the identifier for an entity – either a funder, a person or a resource – defined as an individual of the class datacite:IdentifierScheme. - - - - - - - - - organised in class - organized In Class - - - - - - - - is related to - A is related to B iff there is some relation between A and B. - - is related to - - - - - - - - - - - - - - - - - - is referred to by - A is referred to by B iff B is an informational entity that makes reference to A. - - is referred to by - - - - - - - - - - - - - - is annotation of - is annotation of is a relation between some textual entity and the entity that it annotates. - - is annotation of - - - - - - - - refers to - refers to is a relation between one entity and the entity that it makes reference to. - - refers to - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - propertyOf - Indicating the class to which a data property refers. - propertyOf - - - - - - - - - - - - - - comment - comment - - - - - - - - - - - - - - - - - deprecated - The annotation property that indicates that a given entity has been deprecated. - http://www.w3.org/2002/07/owl - deprecated - - - - - - - - - - - - - - - - The annotation property that provides version information for an ontology or another OWL construct. - versionInfo - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Literal - - - - - - - - - - - - - - characteristic for - The characteristicFor property gives the entity of the characteristic. - - characteristic for - - - - - - - - - - entity related - The entityRelated property asserts that the entity of one observation has an observed relationship to the entity in another observation. - - entity related - - - - - - - - has base qualifier - The hasBaseQualifier together with the hasNextQualfiier property allows qualifier characteristics to be combined and ordered. For example, for an MinimumDailyAverage qualifier, the hasBaseQualifier refers to Minimum and the hasNextQualifier refers to a DailyAverage qualifier, and for a DailyAverage qualifier, the hasBaseQualifier refers to a Daily qualifier and the hasNextQualifier refers to an Average qualifier. - has Base Qualifier - - - - - - - - - - - - - - - - - has context - The hasContext property asserts that one observation serves as the context for another observation. In a hasContext property the domain is the observation being contextualized and the range is the context. Context defines a semantic relationship between two entities that is a fundamental aspect of the observations, but not necessarily of the entities themselves. For example, most measurements are accomplished in a spatio-temporal framework that might be valuable context. The assertions made by contextual observations are assumed of the contextualized observations. Context is a transitive relationship. - - has context - - - - - - - - - has measurement - The hasMeasurement property gives the measurements of the observed entity. - - has measurement - - - - - - - - - - has member - The hasMember property gives the observations of an observation collection. - - has member - - - - - - - - - - has observed relation - The hasObservedRelation property gives the observed relation of an observation. The observation entity is the source (head) entity of the corresponding relationship that was observed. - - has observed relation - - - - - - - - has target unit - The hasTargetUnit property gives the target unit being converted to within a unit conversion. - - has target unit - - - - - - - - - measurement for - The measurementFor property gives the observation (observed entitiy) that a measurement is associated with. Each measurement is for exactly one observation. - - measurement for - - - - - - - - measures characteristic - The measuresCharacteristic property gives the Characteristic that would be measured by a MeasurementType. - http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl - measures characteristic - - - - - - - - - measures entity - The measuresEntity property gives the Entity that would be measured by a MeasurementType. - - - - - - - - - - - - of entity - The ofEntity property gives the observed entity of an observation. - - of entity - - - - - - - - standard for - - standard for - - - - - - - - uses method - The usesMethod property gives the actual method used to carry out the measurement. - - uses method - - - - - - - - uses standard - The usesStandard property gives the standard (e.g., unit) used in a measurement. - - uses standard - - - - - - - - - curator note - - curator note - - - - - - - - - term replaced by - - term replaced by - - - - - - - - location of - a relation between two independent continuants, the location and the target, in which the target is entirely within the location - - location of - - - - - - - - located in - a relation between two independent continuants, the target and the location, in which the target is entirely within the location - - located in - - - - - - - - - ends after - X ends_after Y iff: end(Y) before_or_simultaneous_with end(X) - http://purl.obolibrary.org/obo/RO.owl - ends after - - - - - - - - - happens during - X happens_during Y iff: (start(Y) before_or_simultaneous_with start(X)) AND (end(X) before_or_simultaneous_with end(Y)) - - happens during - - - - - - - - - overlaps - x overlaps y if and only if there exists some z such that x has part z and z part of y - - overlaps - - - - - - - - - temporally related to - A relation that holds between two occurrents. This is a grouping relation that collects together all the Allen relations. - - temporally related to - - - - - - - - - existence starts during - x existence starts during y if and only if the time point at which x starts is after or equivalent to the time point at which y starts and before or equivalent to the time point at which y ends. Formally: x existence starts during y iff α(x) &gt;= α(y) &amp; α(x) &lt;= ω(y). - http://purl.obolibrary.org/obo/RO.owl - existence starts during - - - - - - - - subject - A topic of the resource. - Recommended practice is to refer to the subject with a URI. If this is not possible or feasible, a literal value that identifies the subject may be provided. Both should preferably refer to a subject in a controlled vocabulary. - - subject - - - - - - - - - - - - - - - - is measurement value of - is measurement value of is a relation between a value and the entity that it is a measurement of. - - is measurement value of - - - - - - - - - - - - - - - - - - - - is specialization of - is specialization of is a relation between a more specific instance (in terms of spatial/temporal localization & other attributres) than the other. - - is specialization of - - - - - - - - - - - - - - - references - indicates B is used as a source of information for A - http://vocab.fairdatacollective.org/gdmt/ - References - - - - - - - - reviews - indicates that A is a review of B - - Reviews - - - - - - - - - - - - - - - - - - - - - - - - - - has contributor identifier scheme - The name of the scheme or authority used for the Contributor Identifier. - - has Contributor Identifier Scheme - - - - - - - - has contributor info - - has Contributor Info - Information about a Contributor entity that helped bring into existence the dataset being described. Contributors can be people, organizations and/or physical or virtual infrastructure (e.g., sensors, software). - - - - - - - - - has contributor name info - Information about a contributor name. - - has Contributor Name Info - - - - - - - - has contributor role - - has Contributor Role - The role of the contributor in bringing the described dataset into existence. - - - - - - - - - has contributor type - - has Contributor Type - The type of the contributor of the described dataset (organization or person). - - - - - - - - is temporal region of - - is temporal region of - - - - - - - - - - - - - - - - has resource type - Indicates the resource type of a citation. - - has resource type - - - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - complementOf - - - - - - - - - - - disjointUnionOf - - - - - - - - - - - - disjointWith - - - - - - - - - - has key (owl) - hasKey - - - - - - - - - - - hasSelf - - - - - - - - - has value (owl) - hasValue - - - - - - - - - - on class (owl) - The property that determines the class that a qualified object cardinality restriction refers to. - http://www.w3.org/2002/07/owl - onClass - - - - - - - - - - - onDataRange - - - - - - - - - - - The property that determines the datatype that a datatype restriction refers to. - - onDatatype - - - - - - - - - - - onProperties - - - - - - - - - - - The property that determines the property that a property restriction refers to. - - onProperty - - - - - - - - - - - - - - - - - - - someValuesFrom - - - - - - - - topObjectProperty - - - - - - - - - - with restrictions - - with Restrictions - - - - - - - - - - - - - - - - - - - has broader match - - has broader match - skos:broadMatch is used to state a hierarchical mapping link between two conceptual resources in different concept schemes. - - - - - - - - - - has broader - Broader concepts are typically rendered as parents in a concept hierarchy (tree). - - has broader - Relates a concept to a concept that is more general in meaning. - By convention, skos:broader is only used to assert an immediate (i.e. direct) hierarchical link between two conceptual resources. - - - - - - - - - - - - - - - - - - - - - - - - - - - - close match - - has close match - skos:closeMatch is used to link two concepts that are sufficiently similar that they can be used interchangeably in some information retrieval applications. In order to avoid the possibility of "compound errors" when combining mappings across more than two concept schemes, skos:closeMatch is not declared to be a transitive property. - - - - - - - - - - - - - - - - - - - - - - - - - - - has exact match - skos:exactMatch is disjoint with each of the properties skos:broadMatch and skos:relatedMatch. - - has exact match - skos:exactMatch is used to link two concepts, indicating a high degree of confidence that the concepts can be used interchangeably across a wide range of information retrieval applications. skos:exactMatch is a transitive property, and is a sub-property of skos:closeMatch. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - is in mapping relation with - These concept mapping relations mirror semantic relations, and the data model defined below is similar (with the exception of skos:exactMatch) to the data model defined for semantic relations. A distinct vocabulary is provided for concept mapping relations, to provide a convenient way to differentiate links within a concept scheme from links between concept schemes. However, this pattern of usage is not a formal requirement of the SKOS data model, and relies on informal definitions of best practice. - - is in mapping relation with - Relates two concepts coming, by convention, from different schemes, and that have comparable meanings - - - - - - - - - - - - - - - - has narrower match - - has narrower match - skos:narrowMatch is used to state a hierarchical mapping link between two conceptual resources in different concept schemes. - - - - - - - - - has narrower - Narrower concepts are typically rendered as children in a concept hierarchy (tree). - - has narrower - Relates a concept to a concept that is more specific in meaning. - By convention, skos:broader is only used to assert an immediate (i.e. direct) hierarchical link between two conceptual resources. - - - - - - - - - - has narrower transitive - - has narrower transitive - skos:narrowerTransitive is a transitive superproperty of skos:narrower. - By convention, skos:narrowerTransitive is not used to make assertions. Rather, the properties can be used to draw inferences about the transitive closure of the hierarchical relation, which is useful e.g. when implementing a simple query expansion algorithm in a search application. - - - - - - - - - - - - - - - - - - - - - - - - has related - skos:related is disjoint with skos:broaderTransitive - - has related - Relates a concept to a concept with which there is an associative semantic relationship. - - - - - - - - - - - has related match - - has related match - skos:relatedMatch is used to state an associative mapping link between two conceptual resources in different concept schemes. - - - - - - - - - - - - - - - - - - - - - - - - - top concept of - is top concept in scheme - - top concept of - - - - - - - - - A Variable has an Entity whose Property is observed. - hasObjectOfInterest - - - - - - - - - A Variable has a Property that characterizes an Entity. - hasProperty - - - - - - - - - - - - - - - - - - - - location_name - The location name used by the data contributor or a generic location name designated by AusTraits when one is not provided. - - - study location name - - - - - - - - - - given_name - A given name, in Western contexts often referred to as a first name, is a personal name that specifies and differentiates between members of a group of individuals, especially in a family, all of whose members usually share the same family name (surname). A given name is purposefully given, usually by a child's parents at or near birth, in contrast to an inherited one such as a family name. - - givenName - - - - - - - - - - last_name - A family name (in Western contexts often referred to as a surname or last name) is typically a part of a person's name which has been passed, according to law or custom, from one or both parents to their children. - - family name - - - - - - - - - - - - - - - - - - - - - conforms_to - An established standard to which the described resource conforms. - conforms to - - - - - - - - - date created - Date trait was added to the trait dictionary. - - created - - - - - - - - - - - - - - - - - identifier - Recommended practice is to identify the resource by means of a string conforming to an identification system. Examples include International Standard Book Number (ISBN), Digital Object Identifier (DOI), and Uniform Resource Name (URN). Persistent identifiers should be provided as HTTP URIs. - An unambiguous reference to the resource within a given context. - - Identifier - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - An abstract of the work. - has abstract - - - - - - - - - - - Usually the address of the publisher or other type of institution. For major publishing houses, van Leunen recommends omitting the information entirely. For small publishers, on the other hand, you can help the reader by giving the complete address. - has address - - - - - - - - - - - The authors affiliation. - has affiliation - - - - - - - - - - - An annotation. It is not used by the standard bibliography styles, but may be used by others that produce an annotated bibliography. - has annotation - - - - - - - - - - - author - Names of all the authors for the reference. (AusTraits) - This is tricky due to the fact that order is not (generally) preserved in RDF documents. The problem arises when you want to have an author list where the order is _extremely_ important. How shall we do that? Perhaps we want to define "hasPrimaryAuthor", "hasSecondaryAuthor", "hasTertiaryAuthor", and "hasRemainingAuthors", or something of that sort. This will be have to given more thought. - The name(s) of the author(s), in the format described in the LaTeX book. - - has author - - - - - - - - - - - booktitle - Title of a book, part of which is being cited. See the LaTeX book for how to type titles. For book entries, use the title field instead. - - has booktitle - - - - - - - - - - - A chapter (or section or whatever) number. - has chapter - - - - - - - - - - - A Table of Contents. - has contents - - - - - - - - - - - Copyright information. - has copyright - - - - - - - - - - - The database key of the entry being cross referenced. - has crossref - - - - - - - - - - - edition - The edition of a book--for example, "Second". This should be an ordinal, and should have the first letter capitalized, as shown here; the standard styles convert to lower case when necessary. - - has edition - - - - - - - - - - - editor - Again, the same issues that arose with the "hasAuthor" property apply here. - Name(s) of editor(s), typed as indicated in the LaTeX book. If there is also an author field, then the editor field gives the editor of the book or collection in which the reference appears. - - has editor - - - - - - - - - - - isbn - The International Standard Book Number of a book or report. (AusTraits) - The International Standard Book Number. - has ISBN - - - - - - - - - - - The International Standard Serial Number. Used to identify a journal. - has ISSN - - - - - - - - - - - institution - The institution that published or sponsored the report or thesis. (AusTraits) - This could be an object property that refers to an external set of institution instances. - The sponsoring institution of a technical report. - has institution - - - - - - - - - - - journal - This could optionally be an object property, whereby the range would refer to an external set of journal instances, thus providing standardized abbreviations for different bibliographic styles. - Journal in which the article was published. (AusTraits) - A journal name. Abbreviations are provided for many journals; see the Local Guide - has journal - - - - - - - - - - - key - The key is used to identify the exact reference using authors last name and year of publication. (AusTraits) - The key for a particular bibTeX entry. Note that the rdf:ID for each Entry instance could be the bibTeX key as well, possibly making this property redundant. - has key - - - - - - - - - - - Key words used for searching or possibly for annotation. - - has keywords - - - - - - - - - - - The Library of Congress Call Number. - has LCCN - - - - - - - - - - - The language the document is in. - has language - - - - - - - - - - - place - The location where the reference was written or published. (AusTraits) - A location associated with the entry, such as the city in which a conference took place. - has location - - - - - - - - - - - The month in which the work was published or, for an unpublished work, in which it was written. You should use the standard three-letter abbreviation, as described in Appendix B.1.3 of the LaTeX book. - has month - - - - - - - - - - - The Mathematical Reviews number. - has mrnumber - - - - - - - - - - - hasNote - Additional notes for the reference which are not contained in the other fields. (AusTraits) - Any additional information that can help the reader. The first word should be capitalized. - has note - - - - - - - - - - - number - The issue number for a journal article. - The number of a journal, magazine, technical report, or of a work in a series. An issue of a journal or magazine is usually identified by its volume and number; the organization that issues a technical report usually gives it a number; and sometimes books are given numbers in a named series. - has number - - - - - - - - - - - The organization that sponsors a conference or that publishes a manual. - has organization - - - - - - - - - - - pages - The page numbers for a reference. (AusTraits) - One or more page numbers or range of numbers, such as 42-111 or 7,41,73-97 or 43+ (the `+' in this last example indicates pages following that don't form a simple range). To make it easier to maintain Scribe-compatible databases, the standard styles convert a single dash (as in 7-33) to the double dash used in TeX to denote number ranges (as in 7-33). - has pages - - - - - - - - - - - The price of the document. - has price - - - - - - - - - - - publisher - This is a case where an ObjectProperty might be a better choice, where the range is some set of publisher names defined in another ontology. That would allow all of the metadata for the publisher to be incorporated as needed. - The name of the publisher. (AusTraits) - The publisher's name. - has publisher - - - - - - - - - - - As with "hasPublisher", this could be an ObjectProperty that refers to an external set of school instances. - The name of the school where a thesis was written. - has school - - - - - - - - - - - series - The name of a series or set of books. When citing an entire book, the the title field gives its title and an optional series field gives the name of a series or multi-volume set in which the book is published. - has series - - - - - - - - - - - The physical dimensions of a work. - has size - - - - - - - - - - - title - The title of the reference. (AusTraits) - The work's title, typed as explained in the LaTeX book. - has title - - - - - - - - - - - type - The type of thesis which can include PhD, Masters, Honours. (AusTraits) - The type of a technical report--for example, "Research Note". - has type - - - - - - - - - - - url - The URL of of a web page. - The WWW Universal Resource Locator that points to the item being referenced. This often is used for technical reports to point to the ftp or web site where the postscript source of the report is located. - has URL - - - - - - - - - - - volume - The volume number of the article or book. (AusTraits) - The volume of a journal or multivolume book. - has volume - - - - - - - - - - - year - The year that the reference was published, or written in the case of unpublished articles. (AusTraits) - The year of publication or, for an unpublished work, the year it was written. Generally it should consist of four numerals, such as 1984, although the standard styles can handle any year whose last four nonpunctuation characters are numerals, such as '(about 1984)'. - has year - - - - - - - - - - - How something strange has been published. The first word should be capitalized. - how published - - - - - - - - - - - A generic human creator category, necessary in order to contain both author and editor. - human creator - - - - - - - - - - A generic property to hold page and/or chapter data. - page and/or chapter data - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - reviewers - The cited entity presents statements, ideas or conclusions that are reviewed by the citing entity. - - - - is reviewed by - - - - - - - - - - - - - - - accepted_name_usage - `Tamias minimus` (valid name for Eutamias minimus). - The full scientific name, with authorship and date information if known, of the accepted (botanical) or valid (zoological) name in cases where the provided scientificName is considered by the reference indicated in the accordingTo property, or of the content provider, to be a synonym or misapplied name. When applied to an Organism or Occurrence, this term should be used in cases where a content provider regards the provided scientificName to be inconsistent with the taxonomic perspective of the content provider. For example, there are many discrepancies within specimen collections and observation datasets between the recorded name (e.g., the most recent identification from an expert who examined a specimen, or a field identification for an observed organism), and the name asserted by the content provider to be taxonomically accepted. - - The full name, with authorship and date information if known, of the currently valid (zoological) or accepted (botanical) taxon. - - Accepted Name Usage - - - - - - - - - - - - - 1 - - - - accepted_name_usage_id - `tsn:41107` (ITIS), `urn:lsid:ipni.org:names:320035-2` (IPNI), `2704179` (GBIF), `6W3C4` (COL) - This term should be used for synonyms or misapplied names to refer to the taxonID of a Taxon record that represents the accepted (botanical) or valid (zoological) name. For Darwin Core Archives the related record should be present locally in the same archive. - - An identifier for the name usage (documented meaning of the name according to a source) of the currently valid (zoological) or accepted (botanical) taxon. - - Accepted Name Usage ID - - - - - - - - - - - - - - - - basis_of_record - `field`, `field_experiment`, `captive_cultivated`, `lab`, `preserved_specimen`, `literature` - A categorical variable specifying from which kind of specimen traits were recorded. - - Basis of Record - - - - - - - - - - - - - - - The full scientific name of the class in which the taxon is classified. - 2018-03-21 - - Examples: "Mammalia", "Hepaticopsida". For discussion see http://terms.tdwg.org/wiki/dwc:class - Class - class - - - - - - - - - - - - - - - `Denmark`, `Colombia`, `España` - The name of the country or major administrative unit in which the Location occurs. Recommended best practice is to use a controlled vocabulary such as the Getty Thesaurus of Geographic Names. - 2017-07-07 - - recommended - Examples: "Germany", "Denmark", "Colombia", "España". For discussion see http://terms.tdwg.org/wiki/dwc:country; - The name of the country or major administrative unit in which the Location occurs. - country - - - - - - - - - - - - - 1 - - - - dataset_id - `Falster_2003` - Primary identifier for each study contributed to AusTraits; most often these are scientific papers, books, or online resources. By default this should be the name of the first author and year of publication, e.g. `Falster_2005`. - Primary identifier for each study contributed to AusTraits; most often these are scientific papers, books, or online resources. By default this should be the name of the first author and year of publication. - - - - - An identifier for the set of data. May be a global unique identifier or an identifier specific to a collection or institution. (from ETS) - - datasetID - Dataset ID - - - - - - - - - - Day - day - - - - - - - - - - - - - - - latitude (deg) - `-41.0983423` - The geographic latitude (in decimal degrees, using the spatial reference system given in geodeticDatum) of the geographic center of a Location. Positive values are north of the Equator, negative values are south of it. Legal values lie between -90 and 90, inclusive. - 2017-07-07 - - Example: "-41.0983423". For discussion see http://terms.tdwg.org/wiki/dwc:decimalLatitude - - Decimal Latitude - decimalLatitude - - - - - - - - - - - - - - - longitude (deg) - `-121.1761111` - The geographic longitude (in decimal degrees, using the spatial reference system given in geodeticDatum) of the geographic center of a Location. Positive values are east of the Greenwich Meridian, negative values are west of it. Legal values lie between -180 and 180, inclusive. - 2017-07-07 - - Example: "-121.1761111". For discussion see http://terms.tdwg.org/wiki/dwc:decimalLongitude - - Decimal Longitude - decimalLongitude - - - - - - - - - - - - - - - establishment_means - `native`, `nativeReintroduced`, `introduced`, `introducedAssistedColonisation`, `vagrant`, `uncertain` - Recommended best practice is to use controlled value strings from the controlled vocabulary designated for use with this term, listed at http://rs.tdwg.org/dwc/doc/em/. For details, refer to https://doi.org/10.3897/biss.3.38084 - - Statement about whether an organism or organisms have been introduced to a given place and time through the direct or indirect activity of modern humans. - - Establishment Means - - - - - - - - - - - - - - - collection_date - 1963-03-08T14:07-0600 (8 Mar 1963 at 2:07pm in the time zone six hours earlier than UTC). 2009-02-20T08:40Z (20 February 2009 8:40am UTC). 2018-08-29T15:19 (3:19pm local time on 29 August 2018). 1809-02-12 (some time during 12 February 1809). 1906-06 (some time in June 1906). 1971 (some time in the year 1971). 2007-03-01T13:00:00Z/2008-05-11T15:30:00Z (some time during the interval between 1 March 2007 1pm UTC and 11 May 2008 3:30pm UTC). 1900/1909 (some time during the interval between the beginning of the year 1900 and the end of the year 1909). 2007-11-13/15 (some time in the interval between 13 November 2007 and 15 November 2007). - Date sample was taken, in the format `yyyy-mm-dd`, `yyyy-mm` or `yyyy`, depending on the resoluton specified. Alternatively an overall range for the study can be indicating, with the starting and ending sample date sepatated by a `/`, as in 2010-10/2011-03. (AusTraits) - The date-time or interval during which an Event occurred. For occurrences, this is the date-time when the event was recorded. Not suitable for a time in a geological context. - - Recommended best practice is to use a date that conforms to ISO 8601-1:2019. - Event Date - - - - - - - - - - - - - - - family - Examples: "Felidae", "Monocleaceae". For discussion see http://terms.tdwg.org/wiki/dwc:family - The full scientific name of the family in which the taxon is classified. - 2018-03-21 - - - Family - - - - - - - - - - - - - - - The full scientific name of the genus in which the taxon is classified. - 2018-03-21 - - Examples: "Puma", "Monoclea". For discussion see http://terms.tdwg.org/wiki/dwc:genus - - Genus - genus - - - - - - - - geodetic_datum - Examples: "EPSG:4326", "WGS84", "NAD27", "Campo Inchauspe", "European 1950", "Clarke 1866". For discussion see http://terms.tdwg.org/wiki/dwc:geodeticDatum - The ellipsoid, geodetic datum, or spatial reference system (SRS) upon which the geographic coordinates given in decimalLatitude and decimalLongitude as based. Recommended best practice is use the EPSG code as a controlled vocabulary to provide an SRS, if known. Otherwise use a controlled vocabulary for the name or code of the geodetic datum, if known. Otherwise use a controlled vocabulary for the name or code of the ellipsoid, if known. If none of these is known, use the value "unknown". - - http://rs.tdwg.org/dwc/terms/ - Geodetic Datum - geodeticDatum - - - - - - - - - - - - - - - habitat - `oak savanna`, `pre-cordilleran steppe` - - A category or description of the habitat in which the Event occurred. - - Habitat - - - - - - - - - - - - - - - replicates - If measurement is an aggregate measure of multiple individuals or specimens, report number of specimens as count, i.e. integer number. Defaults to 1 (ETS) - Number of replicate measurements that comprise a recorded trait measurement. (AusTraits) - - A numeric value (or range) is ideal and appropriate if the value type is a `mean`, `median`, `min` or `max`. For these value types, if replication is unknown the entry should be `unknown`. If the value type is `raw_value` the replicate value should be 1. If the trait is categorical or the value indicates a measurement for an entire species (or other taxon) replicate value should be `.na`. (AusTraits) - Examples: "1", "25". For discussion see http://terms.tdwg.org/wiki/dwc:individualCount - Individual Count - - - - - - - - - - - - - - - infraspecific_epithet - `concolor` (for scientificName "Puma concolor concolor"), `oxyadenia` (for scientificName "Quercus agrifolia var. oxyadenia"), `laxa` (for scientificName "Cheilanthes hirta f. laxa"), `scaberrima` (for scientificName "Indigofera charlieriana var. scaberrima"). - In botany, where there can be more than one infraspecific rank, name strings may be provided, in literature and in identifications, that have more than two epithets. Only the last of these epithets is the infraspecificEpithet and only the first and the last epithets belong to the scientificName. For example: the infraspecificEpithet in the string "Indigofera charlieriana subsp. sessilis var. scaberrima" is `scaberrima` and the scientificName is `Indigophera charlieriana var. scaberrima`. - - The name of the lowest or terminal infraspecific epithet of the scientificName, excluding any rank designation. - - Infraspecific Epithet - - - - - - - - - - - - - - - kingdom - The full scientific name of the kingdom in which the taxon is classified. - 2017-07-07 - 2018-03-21 - - Examples: "Animalia", "Plantae". For discussion see http://terms.tdwg.org/wiki/dwc:kingdom - - Kingdom - kingdom - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - life_stage - `adult`, `juvenile`, `sapling`, `seedling` - A field to indicate the life stage or age class of the entity measured. Standard values are `adult`, `sapling`, `seedling` and `juvenile`. (AusTraits) - The age class or life stage of the Organism(s) at the time the Occurrence was recorded. (DarwinCore) - - - Life Stage - - - - - - - - - - - - - 1 - - - - - location_id - `https://opencontext.org/subjects/768A875F-E205-4D0B-DE55-BAB7598D0FD1` - A unique integer identifier for a location, with locations numbered sequentially within a dataset. The identifier links to specific information in the location table. (AusTraits) - An identifier for the set of location information (data associated with dcterms:Location). May be a global unique identifier or an identifier specific to the data set. - 2017-07-07 - - An identifier for the set of location information (data associated with dcterms:Location). May be a global unique identifier or an identifier specific to the data set. - Could report the plot within the experimental setting which would be further specified in the metadata or in a separate dataset. For discussion see http://terms.tdwg.org/wiki/dwc:locationID - - locationID - - - - - - - - - methods - A textual description of the methods used to collect the trait data. Whenever available, methods are taken near-verbatim from the referenced source. Methods can include descriptions such as 'measured on botanical collections', 'data from the literature', or a detailed description of the field or lab methods used to collect the data. (AusTraits) - The method or protocol used to determine the measurement, fact, characteristic, or assertion.(DarwinCore) - - Terms in the dwciri namespace are intended to be used in RDF with non-literal objects. - - Measurement Method - - - - - - - - - - - - - - - measurement_remarks - Brief comments or notes accompanying the trait measurement. (AusTraits) - Comments or notes accompanying the MeasurementOrFact. (DarwinCore) - - - Measurement Remarks - - - - - - - - - - - - - - - - - - - - value - The value of the measurement, fact, characteristic, or assertion. - - Measurement Value - - - - - - - - - - Month - month - - - - - - - - - nomenclatural_source - `ICN`, `ICZN`, `BC`, `ICNCP`, `BioCode` - Recommended best practice is to use a controlled vocabulary. - - - The nomenclatural code (or codes in the case of an ambiregnal name) under which the scientificName is constructed. - - Nomenclatural Code - - - - - - - - - - sampling_strategy - A written description of how study locations were selected and how study individuals were selected. When available, this information is lifted verbatim from a published manuscript. For preserved specimens, this field ideally indicates which records were 'sampled' to measure a specific trait. (AusTraits) - The methods or protocols used during an Event, denoted by an IRI. (DarwinCore) - - - sampling Protocol - - - - - - - - - - - - - - - scientific_name - `Coleoptera` (order). `Vespertilionidae` (family). `Manis` (genus). `Ctenomys sociabilis` (genus + specificEpithet). `Ambystoma tigrinum diaboli` (genus + specificEpithet + infraspecificEpithet). `Roptrocerus typographi (Györfi, 1952)` (genus + specificEpithet + scientificNameAuthorship), `Quercus agrifolia var. oxyadenia (Torr.) J.T. Howell` (genus + specificEpithet + taxonRank + infraspecificEpithet + scientificNameAuthorship). - This term should not contain identification qualifications, which should instead be supplied in the IdentificationQualifier term. When applied to an Organism or Occurrence, this term should be used to represent the scientific name that was applied to the associated Organism in accordance with the Taxon to which it was or is currently identified. - - The full scientific name, with authorship and date information if known. When forming part of an Identification, this should be the name in lowest level taxonomic rank that can be determined. This term should not contain identification qualifications, which should instead be supplied in the IdentificationQualifier term. - - Scientific Name - - - - - - - - - scientific_name_authorship - `(Torr.) J.T. Howell`, `(Martinovský) Tzvelev`, `(Györfi, 1952)` - - - The authorship information for the scientificName formatted according to the conventions of the applicable nomenclaturalCode. - - Scientific Name Authorship - - - - - - - - - - - - - 1 - - - - - scientific_name_id - `urn:lsid:ipni.org:names:37829-1:1.3` - An identifier for the nomenclatural (not taxonomic) details of a scientific name. - - - Scientific Name ID - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - `female`, `male`, `hermaphrodite` - Recommended best practice is to use a controlled vocabulary. - - The sex of the biological individual(s) represented in the Occurrence. - - Sex - - - - - - - - - - - - - 1 - - - - - taxon_concept_id - `8fa58e08-08de-4ac1-b69c-1235340b7001` - A taxon concept is a reference which is made to a taxon name along with a publication which explains how the author intends for the name to be applied. - An identifier for the taxonomic concept to which the record refers - not for the nomenclatural details of a taxon. - - http://rs.tdwg.org/dwc/terms/ - Taxon Concept ID - - - - - - - - - - - - - 1 - - - - - taxon_id - `8fa58e08-08de-4ac1-b69c-1235340b7001`, `32567`, `https://www.gbif.org/species/212` - An identifier for the set of taxon information (data associated with the Taxon class). May be a global unique identifier or an identifier specific to the data set. - 2017-07-07 - - recommended - Examples: "GBIF Backbone Taxonomy:497924", "8fa58e08-08de-4ac1-b69c-1235340b7001", "32567", "http://species.gbif.org/abies_alba_1753", "urn:lsid:gbif.org:usages:32567". For discussion see http://terms.tdwg.org/wiki/dwc:taxonID - - Taxon ID - taxonID - - - - - - - - - taxon_rank - `subspecies`, `varietas`, `forma`, `species`, `genus` - The taxonomic rank of the most specific name in the scientificName. Recommended best practice is to use a controlled vocabulary. - 2017-07-07 - 2018-03-21 - - - recommended - This is to clarify cases where information is not given on a species level. Examples: "subspecies", "varietas", "forma", "species", "genus". For discussion see http://terms.tdwg.org/wiki/dwc:taxonRank - - taxonRank - Taxon Rank - - - - - - - - - taxonomic_status - `invalid`, `misapplied`, `homotypic synonym`, `accepted` - - - The status of the use of the scientificName as a label for a taxon. Requires taxonomic opinion to define the scope of a taxon. Rules of priority then are used to define the taxonomic status of the nomenclature contained in that scope, combined with the experts opinion. It must be linked to a specific taxonomic reference that defines the concept. - - Taxonomic Status - - - - - - - - - - - - - - - elevation - The original description of the elevation (altitude, usually above sea level) of the Location. - - http://rs.tdwg.org/dwc/terms/ - Verbatim Elevation - - - - - - - - - - - - - - - - - - Year - year - - - - - - - - - - - - - - - - - - - - - - - - - - - - - database_author - 2018-05-29 - - author - - - - - - - - - - - - - - - comments - Details and Examples for clarification of the trait definition. - - Can contain definition of trait levels or requirements for standardized conditions. - - comments - - - - - - - - - - - - - - - - - - - - - database_conforms_to - Literal reference (incl. version or date issued) and/or DOI of data standard that the dataset applies. Example: "Ecological Trait-data Standard Vocabulary, v0.9.1, URL: https://terminologies.gfbio.org/terms/ets/pages/, DOI: 10.5281/zenodo.1485739" - An established standard to which the described resource conforms. - - - conformsTo - - - - - - - - - metadata$description - A 1-2 sentence description of the purpose of the study. (AusTraits) - An account of the resource. (ETS) - - - datasetDescription - - - - - - - - - - - - - - - units - The unit expected for measurement entries. - 2017-10-24 - 2017-11-15 - - recommended - Only applies to numerical traits. Should be given in SI units. - - expectedUnit - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - database_issued - Date of formal issuance (e.g., publication) of the resource. Recommended best practice is to use an encoding scheme, such as ISO 8601:2004(E). - - - database issued date - issued - - - - - - - - - - - - - - - - - - - - - database_license - Examples: "http://creativecommons.org/publicdomain/zero/1.0/legalcode", "http://creativecommons.org/licenses/by/4.0/legalcode". For discussion see http://terms.tdwg.org/wiki/dwc:license - A legal document giving official permission to do something with the resource. - - - license - - - - - - - - - - - - - - - allowed_values_max - A lower boundary for accepted numerical values. - 2017-09-27 - - required - Is used for eliminating invalid data. It specifies the maximum meaningful value for a numeric trait. - May be used for eliminating invalid data. This boundary may constrain a range of values of meaningful orders of magnitude, or constrain entries to positive values. - - maxAllowedValue - maxAllowedValue - - - - - - - - - - - - - - - allowed_values_min - An upper boundary for accepted numerical values. - 2017-09-27 - - required - Is used for eliminating invalid data. It specifies the minimum meaningful value for a numeric trait. - May be used for eliminating invalid data. This boundary may constrain a range of values of meaningful orders of magnitude. - - minAllowedValue - minAllowedValue - - - - - - - - - - - - - - - - - - - - - database_rights - Information about rights held in and over the resource. - 2019-03-25 - - recommended - Typically, rights information includes a statement about various property rights associated with the resource, including intellectual property rights. - rights - - - - - - - - - - - - - - - - - - - - - database_rights_holder - A person or organization owning or managing rights over the resource, i.e. this dataset. - - rightsHolder - - - - - - - - - - - - - - - description - A short, unambiguous definition of the trait as used in the specific study context; may refer to a method of measurement; may copy the description of a public trait ontology; - 2017-09-27 - 2018-05-29 - - recommended - The definition should make use of terms provided by existing public ontologies, e.g. 'the mass (PATO:mass), either fresh or dried, of a fruit (PO:fruit)' - - traitDescription - traitDescription - - - - - - - - - - - - - 1 - - - - trait_id - Unique identifier of the trait according to a public ontology, or a user-provided thesaurus of traits. - 2018-11-01 - - recommended - Examples: TO:0000181; TOP103; http://top-thesaurus.org/annotationInfo?viz=1&&trait=Seed_mass; - - traitID - - - - - - - - - - - - - - - - - - - 1 - - - - traits$value - A measurement value is a quantitative or qualitative result of a measurement. Measurement values can contain a coded representation of the result, e.g., as a number or string. (ETS) - The measured value of a trait. (AusTraits) - 2019-03-25 - - traitValue - - - - - - - - - - - - - - - type - Type of trait values. Possible entries are 'numeric', 'integer', 'categorical', 'ordinal', 'logical', or 'character'. - 2017-09-27 - 2017-11-15 - - recommended - Numerical values represent measurements of length, volumes, ratios, rates or timespans. Integer values apply to count data (e.g. eggs per clutch). Binary data (encoded as 0 or 1) or logical data (coded as TRUE or FALSE) may apply to qualitative traits such as specific behavior during mating (e.g. are territories defended) or specialization to a given habitat (e.g. species restricted to relicts of primeval forests). Categorical traits should define a constrained set of factor levels, such as sex differences in wing morphology (both sexes winged, both sexes unwinged, only males winged, only females winged) or unconstrained entries such as color. Ordinal categorical traits may be better encoded as integer values, e.g. a logical sequence as in the case of life stages or hibernation stages, or habitat preference traits such as horizontal stratum use. - - valueType - - - - - - - - - - - - - - - - - - - - - database_version - The version of the dataset. - 2017-09-05 - 2018-05-29 - - recommended - Can be a date or numeric versioning scheme, e.g. following semantic versioning https://semver.org/ - version - - - - - - - - - - - - - - - - - - - - - - - - - - - additional_role - Additional roles of data collector, mostly contact person. - - additional_role - - - - - - - - - - - - - - - basis_of_value - - A categorical variable describing how the trait value was obtained. - - basis of value - - - - - - - - - - - - - - - - - - - - - binomial - The species-level taxon name match for an original name. This column is assigned `na` for taxon name that are at a broader taxonomic_resolution. - - - binomial - - - - - - - - - - - - - - - - - - - - - measured_characteristic - Keywords pertaining to what categorical or numeric property is measured, such as whether the measurement is a length, volume, duration of time, or shape. - - - measured characteristic - - - - - - - - - - notes - - - Additional notes pertaining to the data collector. - contributor_notes - - - - - - - - - austraits$contributors - A table of people contributing to each study. - - - - - - - - - - - - - - - - - - - - - - - - - deprecated_trait_name - Name this trait concept has had in previous versions of the database. - - - deprecated trait name - - - - - - - - - - - - - 1 - - - - - entity_context_id - A unique integer identifier indicating specific contextual properties of an entity, possibly including the entity's sex or caste. - - - entity_context_id - - - - - - - - - - - - - - - - - - - - - 1 - - - - genus taxon id - Taxon_id identifer for the genus associated with a name. - - - taxon id for genus - - - - - - - - - - - doi - The digital object identifier for the resource. - - has DOI - - - - - - - - - - - - - 1 - - - - - individual_id - A unique integer identifier for an individual, with individuals numbered sequentially within each dataset by taxon by population grouping. Most often each row of data represents an individual, but in some datasets trait data collected on a single individual is presented across multiple rows of data, such as if the same trait is measured using different methods or the same individual is measured repeatedly across time. - - - traits.build - individual_id - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - link_id - Variable indicating which identifier column in the traits table contains the specified `link_vals`. - - - link to context identifier categories - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - link_vals - Unique integer identifiers that link between identifier columns in the `traits` table and the contextual properties/values in the `contexts` table. - - - link to context identifier values - - - - - - - - - - - - - - - - **terms appearing in metadata input files but not output still to be added** - - - - - - - - - - - - - 1 - - - - - method_id - A unique integer identifier indicating a trait is measured multiple times on the same entity, with different methods used for each entry. This field is only used if a single trait is measured using multiple methods within the same dataset. The identifier links to specific information in the context table. - - - - method_id - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - 1 - - - - - plot_id - A unique integer identifier for a plot, where a plot is a distinct collection of organisms within a single geographic location, such as plants growing on different aspects or blocks in an experiment. The identifier links to specific information in the context table. - - - plot_id - - - - - - - - - - - - - 1 - - - - - population_id - A unique integer identifier for a population, where a population is defined as individuals growing in the same location (location_id /location_name) and plot (a context; identified by plot_id) and being subjected to the same treatment (a context; identified by treatment_id). - - - population_id - - - - - - - - - - - - - - - - - - - - - 1 - - - - - source_id - For datasets that are compilations, an identifier for the original data source. - - - source_id - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - - - taxon_distribution - Known distribution of the taxon. - 2022-10-31 - - - taxonDistribution - - - - - - - - - - - - - - - taxon_name - Scientific name of the taxon on which traits were sampled, without authorship. When possible, this is the currently accepted (botanical) or valid (zoological) scientific name, but might also be a higher taxonomic level. - - taxon name - - - - - - - - - taxonomic reference - Name of the taxonomy (tree) that contains this concept. ie. APC, AusMoss etc. - - - - taxonomic reference - - - - - - - - - taxonomic resolution - The rank of the most specific taxon name (or scientific name) to which a submitted orignal name resolves. - - - - taxonomic resolution - - - - - - - - - - - - - 1 - - - - - temporal_id - A unique integer identifier assigned where repeat observations are made on the same individual (or population, or taxon) across time. The identifier links to specific information in the context table. - - - temporal_id - - - - - - - - - - - - - - - - - - - - - tissue_entity - Indication of what tissue(s) (or organ) is being measured for a given trait. This is a hierarchical list defined in the accompanying trait dictionary/thesaurus. - - - tissue entity - - - - - - - - - - - - - - - trait_applicability - Verbal description of the scope of the trait, specifying groups of taxa where the trait can be used. - - - trail applicability - - - - - - - - - - - - - - - trait_category - Broader trait groupings of which a given trait is a member. This is a hierarchical list defined in the accompanying trait dictionary/thesaurus. - - - trait category - - - - - - - - - - - - - - - references - Key sources for trait name and definition. - - - References - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - trait_label - Preferred trait label. - - - trait label - - - - - - - - - - - - - - - - trait_links_close_match - Links to similar traits in other databases. - - - trait links close match - - - - - - - - - - - - - - - - - trait_links_exact_match - Links to identical traits in other databases. - - - trait links exact match - - - - - - - - - - - - - - - - - trait_links_related_match - Links to related traits in other databases. - - - trait links related match - - - - - - - - - - - - - 1 - - - - trait_name - Name of the trait sampled. - - - trait name - - - - - - - - - - - - - 1 - - - - - treatment_id - A unique integer identifier for a treatment, where a treatment is any experimental manipulation to an organism's growing/living conditions. The identifier links to specific information in the context table. - - - treatment_id - - - - - - - - - - - - - - - trinomial - The infraspecific taxon name match for an original name. This column is assigned `na` for taxon name that are at a broader taxonomic_resolution. - - - trinomial - - - - - - - - - - - - - - - - - - - - - - - - - - - - - contexts$category - The category of contextual data. - - Options are `plot` (a distinct collection of organisms within a single geographic location, such as plants growing on different aspects or blocks in an experiment), `treatment` (an experimental treatment), `entity_context` (contextual information to record about the entity the isn't documented elsewhere, including the entity's sex, caste), `temporal` (indicating when repeat observations are made on the same individual (or population, or taxon) across time) and `method` (indicating the same trait was measured on the same individual (or population, or taxon) using multiple methods). - - context category - - - - - - - - - - - - - 1 - - - - - - - 1 - - - - - - - 1 - - - - - - - 1 - - - - - - - 1 - - - - - - - 1 - - - - contexts$value - The measured value of a context property. - - http://www.semanticweb.org/traits.build - context property value - - - - - - - - - - - - - 1 - - - - locations$value - The measured value of a location property. - - http://www.semanticweb.org/traits.build - location property value - - - - - - - - - cardinality - - - - - - - - - - maxCardinality - - - - - - - - - - maxQualifiedCardinality - - - - - - - - - - minCardinality - - - - - - - - - - minQualifiedCardinality - - - - - - - - - - qualifiedCardinality - - - - - - - - topDataProperty - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - anatomical_entity - A part of a cellular organism that is either an immaterial entity or a material entity with granularity above the level of a protein complex. Or, a substance produced by a cellular organism with granularity above the level of a protein complex. (CARO) - Following BFO, material anatomical entities may have immaterial parts (the lumen of your stomach is part of your stomach). The granularity limit follows the limits set by the Gene Ontology on the granularity limit for GO:cellular_component. Note that substances produced by an organism (sweat, feaces, urine) do not need to be part of an organism to qualify as an anatomical structure. - - Anatomical Entity - - - - - - - - - multi_tissue_structure - Anatomical structure that has as its parts two or more portions of tissue of at least two different types and which through specific morphogenetic processes forms a single distinct structural unit demarcated by bona-fide boundaries from other distinct structural units of different types. - - Multi-Tissue Structure - - - - - - - - - - - - - - - individual - An individual life form. - Measured value comes from a single individual. - - Organism - - - - - - - - - - - - - - - population - A set of organisms of the same species within a defined area. - Measured value represents a summary statistic from multiple individuals at a single location. (AusTraits) - - Population - - - - - - - - - temporal_entity - - Temporal Entity - - - - - - - - - - Time Datum - - - - - - - - - value_type - A base qualifier denotes a simple, atomic qualifier such as average, minimum, and maximum. (OBOE) - A categorical variable describing the statistical nature of the trait value recorded. (AusTraits) - - - Base Characteristic Qualifier - - - - - - - - - - - - - - trait_measured - A characteristic represents a property of an entity that can be measured (e.g., height, length, or color). A characteristic of an entity is observed through a measurement, which further asserts a value of the characteristic for the entity. A characteristic type (e.g., "height") can be associated with many different entities, whereas an individual characteristic (a particular occurrence of the "height" characteristic) is associated to exactly one entity. - - Characteristic - - - - - - - - Characteristic Qualifier - A measured characteristic of an entity is one that is measurable within the physical world. A measured characteristic includes primary and derived physical dimensions (e.g., length, mass, area, density). The measured value assigned to a measured characteristic may be a quantity (a numerical value) or a quality (a category), including both nominal and ordinal categories. - - Characteristic Qualifier - - - - - - - - oboe-core:Entity - entity - An entity is an object (e.g., a tree, a community, an ecological process). Entities constitute the foci of observations, i.e., every observation is of exactly one entity. - - Entity - - - - - - - - - An identifying characteristic is used to identify or name an entity either globally or within a context, such as a name of a lake or a tag number assigned to a tree. Unlike a MeasuredCharacteristic, an IdentifyingCharacteristic is assigned and not measured. - Identifying Characteristic - - - - - - - - - A measured characteristic of an entity is one that is measurable within the physical world. A measured characteristic includes primary and derived physical dimensions (e.g., length, mass, area, density). The measured value assigned to a measured characteristic may be a quantity (a numerical value) or a quality (a category), including both nominal and ordinal categories. - Measured Characteristic - - - - - - - - - - - - - - measured_value - A measurement value is a quantitative or qualitative result of a measurement. Measurement values can contain a coded representation of the result, e.g., as a number or string. - - Measured Value - - - - - - - - - - - - - - - - - - 1 - - - - - - - 1 - - - - - - - 1 - - - - - - - - measurement - A measurement is an assertion that a characteristic of an entity had a particular value with respect to an observation event. A measurement is comprised of a characteristic, a value, a measurement standard, and a protocol. Measurements can also have precision as well as a description of the methods used. Measurements can encapsulate characteristics that were recorded, but not necessarily measured in a physical sense. For example, the name of a location and a taxon can be captured through measurements. - - Measurement - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - oboe-core:Observation - observation - An Observation is a collection of measurements made at a single point in time. In AusTraits it is the subset of rows within the traits data that are made on the same entity and share the same temporal_id. - An observation is an assertion that an entity (e.g., biological organisms, geographic locations, or environmental features, among others) was observed by an observer. An observation primarily serves to group a set of measurements together into a single "observation event". Observations are often made within a broader context. The context of an observation is given by other observations, implying that an observed entity (and specifically each corresponding measurement) contextualizes another observed entity (and its corresponding measurements). For example, an observation associated with a location may serve as context for an observation associated with an organism. In this case, the observed characteristic values of the location (such as humidity) are assumed constant for the corresponding measurements of the organism. - - Observation - - - - - - - - - oboe-core:ObservationCollection - observation_collection - An observation collection is a container for a set of observations. - - Observation Collection - - - - - - - - standard - A standard defines a reference for comparing or naming entities via a measurement. A standard can be defined intentionally (e.g., as in the case of units) or extensionally (by listing the values of the standard, e.g., for color this might be red, blue, yellow, etc). - - Standard - - - - - - - - units - A unit is a standard quantification for physical measurements. A unit is either a base unit, a composite unit, or a derived unit. - - Unit - - - - - - - - - organ - A unique macroscopic (gross) anatomic structure that performs specific functions. It is composed of various tissues. An organ is part of an anatomic system or a body region. Representative examples include the heart, lung, liver, spleen, and uterus. - - - - - - - - - tissue - tissue - - - - - - - - - cell - A material entity of anatomical origin [part of or deriving from an organism] that has as its parts a maximally connected cell compartment surrounded by a plasma membrane. - cell - - - - - - - - dataset - A data file and corresponding metadata collected for a single study by a single team. (AusTraits) - A data item that is an aggregate of other data items of the same type that have something in common. Averages and distributions can be determined for data sets. - - data set - - - - - - - - identifier - An identifier is an information content entity that is the outcome of a dubbing process and is used to refer to one instance of entity shared by a group of people to refer to that individual entity. - - identifier - - - - - - - - - - - - - - method - The manner, procedure or technique by which a morphological or physiological state or property in a single individual or sample or a group of individuals or samples is assessed and a quantitative or qualitative value assigned. - - measurement method - - - - - - - - organism - - organism - - - - - - - - - - - - - - - - - - - - - - location - A spatial region or named place. - - Location - - - - - - - - - - - - - - source - A bibliographic reference for the resource. (dcterms) - Recommended practice is to include sufficient bibliographic detail to identify the resource as unambiguously as possible. - - bibliographic citation - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - An article from a journal or magazine. - Article - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - A book with an explicit publisher. - Book - - - - - - - - - - - - 1 - - - A work that is printed and bound, but without a named publisher or sponsoring institution. - Booklet - - - - - - - - - - The same as INPROCEEDINGS, included for Scribe compatibility. - Conference - - - - - - - - bibtype_entry - Base class for all entries - - Entry - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - A part of a book, which may be a chapter (or section or whatever) and/or a range of pages. - Inbook - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - A part of a book having its own title. - Incollection - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - An article in a conference proceedings. - Inproceedings - - - - - - - - - - - - 1 - - - Technical documentation. - Manual - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - A Master's thesis. - Mastersthesis - - - - - - - - - Use this type when nothing else fits. - Misc - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - A PhD thesis. - Phdthesis - - - - - - - - - - - - 1 - - - - - - 1 - - - The proceedings of a conference. - Proceedings - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - A report published by a school or other institution, usually numbered within a series. - Techreport - - - - - - - - - - - - 1 - - - - - - 1 - - - - - - 1 - - - A document having an author and title, but not formally published. - Unpublished - - - - - - - - - - - - - - event - A specimen collection process. A camera trap image capture. A marine trawl. - An action that occurs at some location during some time. - - Event - - - - - - - - - taxon - A group of organisms (sensu http://purl.obolibrary.org/obo/OBI_0100026) considered by taxonomists to form a homogeneous unit. - taxon - Examples: The genus Truncorotaloides as published by Brönnimann et al. in 1953 in the Journal of Paleontology Vol. 27(6) p. 817-820. - - Taxon - - - - - - - - descriptor - A descriptor (index term, subject term, subject heading) is a term that captures the essence of the topic of a document. - - descriptor - - - - - - - - concept - A concept is term that refers to a generalization of a set of attributes or entities. - - concept - - - - - - - - database - A database is a set of tables. - - database - - - - - - - - database table - A database table is a set of named columns with zero or more rows composed of cells that contain column values and is part of a database. - - database table - - - - - - - - measurement_or_fact - Extended Vocabulary for the description of measurement procedures - 2017-11-14 - Fields included in the traits table, ausrtaits$traits - These data should be linked to the core traitdata by a unique measurementID. (ETS) - - MeasurementOrFact - - - - - - - - - austraits$methods - A table containing details on methods with which data were collected, including time frame and source. Cross referencing with the `traits` table is possible using combinations of the variables `dataset_id`, `trait_name`. (AusTraits) - Vocabulary to describe dataset-level properties on authorship and rights - 2017-11-14 - Specifications on terms of use and dataset-level information on authorship and ownership. - - Metadata - - - - - - - - - austraits$taxa - A table containing details on taxa associated with information in `traits`. Whenever possible, this information is sourced from curated taxon lists that include identifiers for each taxon. The information compiled in this table is released under a CC-BY3 license. Cross referencing between the two dataframes is possible using combinations of the variable `taxon_name`. (AusTraits) - - Taxon - - - - - - - - - austraits$traits - A table containing measurements of traits. (AusTraits) - Vocabulary for ecological trait data. (ETS) - The terms in this vocabulary structure facts or measurement values describing a property of an entity (e.g. a single specimen or population) of a specific taxon. - - Traitdata - - - - - - - - - - - - - - - trait dictionary - Vocabulary for lists of trait definitions - 2017-11-14 - Collection of trait definitions and associated vocabularies. The trait concepts (MeasurementTypes, per OBOE) for which there are established descriptions, allowable values, and associated units. (AusTraits) - This should be a standalone dictionary/thesaurus that is used for building a database. This ontology documents the required fields. A minimal set of terms for semantic trait ontologies or thesauri of trait definitions (ETS) - - Traitlist - - - - - - - - - - - - - - - - - contact_person - Person with knowledge of how to access, troubleshoot, or otherwise field issues related to the resource - - - Contact Person - - - - - - - - contributor_role - A high-level classification of the diverse roles performed in the work leading to a published research output in the sciences. Its purpose to provide transparency in contributions to scholarly published work, to enable improved systems of attribution, credit, and accountability. - - Contributor Role - - - - - - - - - - - 1 - - - - - - - - - - - data_collectors - Person/institution responsible for finding, gathering/collecting data under the guidelines of the author(s) or Principal Investigator (PI) - - - Data Collector - - - - - - - - - - - - - - - austraits_curators - Person tasked with reviewing, enhancing, cleaning, or standardizing metadata and the associated data submitted for storage, use, and maintenance within a data centre or repository - - - Data Curator - - - - - - - - Identifier scheme used for identifying person, organization, system, physical or digital object. - http://vocab.fairdatacollective.org/gdmt/ - Identifier Scheme - - - - - - - - - - - - - - - assistants - A person without a specifically defined role in the development of the resource, but who is someone the author wishes to recognize. - - - Related Person - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - observations_on_taxon - A hypothetical construct which is all observations made on a single taxon. - - observations_on_taxon - - - - - - - - - austraits$build_info - A description of the computing environment used to create this version of the dataset, including version number, git commit and R session_info. - - build info - - - - - - - - - austraits$contexts - Contextual characteristics associated with information in `traits`. - - contexts table - - - - - - - - - - - - - - - - - - - - - - - - measurement_of_location_property - A location characteristic being recorded. The name should include units of measurement, e.g. `MAT (C)`. Ideally we have at least the following variables for each location, `longitude (deg)`, `latitude (deg)`, `description`. - - measurement of location property - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - method_context - Category of context indicating that multiple measurements of the same trait were made of the same individual, population or species-level entity at a single point in time using multiple methods that might have affected the measured trait values. - - method context - - - - - - - - - - - - - - - - - - 1 - - - - A hypothetical construct which is all observations made at a single location. - - - - - - - - - - observation_of_context_property - Specific situations within which trait measurements are made that can help explain the measured values. - - observation of context property - - - - - - - - - - - - - - - observation_of_entity_context_property - Examples include the sex, caste, and age of the entity measured. - Observation where entity is a category of context capturing ancillary information about an individual, population or species-level entity that might affect the trait values measured on this entity. - - observation of entity context property - - - - - - - - - observation_of_method_context_property - Examples are leaf age, sapwood diameter at point of measurement, measurement temperature, and light environment. - Observations on category of context indicating that multiple measurements of the same trait were made of the same individual, population or species-level entity at a single point in time using multiple methods that might have affected the measured trait values. - - - observation of method context property - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - observation_of_organism - - observation of organism - - - - - - - - - - - - - - - observation_of_plot_context_property - Examples are treatment blocks or plots or natural variation within a location such as fire intensity or slope position. - Observation where entity is a context identified as having the category 'plot', indicating it is a context reflecting stratified variation within a location that might affect the trait values measured on an individual, population or species-level entity. - - plot - - observation of plot context property - - - - - - - - - - - - - - - observation_of_temporal_context_property - Examples of temporal contexts are `sampling season`; `sampling time of day`; and sequential `observation numbers` without any linked categorical description. - Observation where entity is a category of context indicating repeat observations have been made on an individual, population or species-level entity across time which might affect the entity's measured trait values. - temporal - - observation of temporal context property - - - - - - - - - - - - - - - observation_of_treatment_context_property - Examples include growing temperature, growing CO2 concentrations, and nutrient amendments. - Observation where entity is a context indicating experimental manipulations that might affect the trait values measured on an individual, population or species-level entity. - - observation of treatment context property - - - - - - - - - observations_from_dataset - observations from dataset - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - A hypothetical construct which is all observations made on an individual organism, as identified by a unique combination of dataset_id, source_id, taxon_name, population_id, and individual_id. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - observations_on_population - A hypothetical construct which is all observations made on a single population, as identified by unique combinations of dataset_id, source_id, and population_id. - observations on population - - - - - - - - - - - - - - - - - - - - - - - plot_context - Category of context indicating stratified variation within a location that might affect the trait values measured on an individual, population or species-level entity. - - plot context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - treatment_context - - treatment context - - - - - - - - - - - - - - - - list - - list - - - - - - - - property - - property - - - - - - - - class - - Class - - - - - - - - datatype - - Datatype - - - - - - - - - The class of OWL classes. - Class - - - - - - - - - The class of property restrictions. - Restriction - - - - - - - - Thing - - - - - - - - concept - - Concept - - - - - - - - concept_scheme - A SKOS concept scheme can be viewed as an aggregation of one or more SKOS concepts. - - Concept Scheme - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Digital Object Identifier; a character string used to uniquely identify an object. A DOI name is divided into two parts, a prefix and a suffix, separated by a slash. - DOI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Value for an entity falls within specified limits. - - - - - - - - - - - - - - - Value is the maximum of values recorded for an entity. - - - - - - - - - Value is the median of values recorded for an entity. - - - - - - - - - - - - - - - Value is the median of values recorded for an entity. - - - - - - - - - Value is the minimum of values recorded for an entity. - - - - - - - - - Value is the mode of values recorded for an entity. - - - - - - - - - - - - - - - Value is a range of values recorded for an entity. - - - - - - - - - Value recorded for an entity. - - - - - - - - - - - - - - - - - - - - - - - 1 - - - 1 - - - 1 - - - - - - - - Specific feature of an organism that can be measured (for numeric traits) or scored (for categorical traits). (AusTraits) - MeasurementType - trait_concept - - A MeasurementType describes the type of a Measurement in which the Measurement would follow the associated Protocol to record the value of the associated Characteristic of the associated Entity using the associated Standard. Any of these associated properties may be omitted, in which case the MeasurementType is only constrained by the provided associations. A MeasurementType is a hypothetical construct, in that it is not associated with a particular instance of a Measurement. - - - observing process - - - submitting process - - - locality description - - - museum collection - - - organismal museum collection - - - taxonomic identification process - - - material target of observation - - - material target of observation role - - - taxonomic inventory process - - - taxonomic inventory - - - restricted search taxonomic inventory process - - - open search taxonomic inventory process - - - opportunistic search taxonomic inventory process - - - trap or sample taxonomic inventory process - - - adventitious taxonomic inventory process - - - compilation taxonomic inventory process - - - taxonomic inventory metadata preferred name - - - obsolete has role - - - museum - - - obsolete fossil specimen - - - obsolete living specimen - - - obsolete preserved specimen - - - obsolete darwin core class - - - human observation process - - - machine observation process - - - obsolete derives from by planned process - - - obsotlete is derived into by planned process - - - specimen collection process X - - - material entity A - - - material entity B - - - material entity C - - - specimen collection process Y - - - incorrect identifier format, replaced - - - identification assertion - - - to taxon - - - of organism - - - member of taxon - - - continuant - continuant - - - occurrent - - - independent continuant - - - spatial region - - - disposition - - - realizable entity - - - quality - - - specifically dependent continuant - specifically dependent continuant - - - role - - - object aggregate - - - object - - - generically dependent continuant - - - function - - - material entity - material entity - - - part of - is part of - - - has part - has part - - - realized in - realized in - - - realizes - realizes - - - preceded by - preceded by - - - precedes - precedes - - - occurs in - occurs in - - - contains process - site of - - - immaterial entity - - - organism or virus or viroid - - - deoxyribonucleic acid - deoxyribonucleic acid - - - nucleic acid - nucleic acid - - - ribonucleic acid - ribonucleic acid - - - macromolecule - macromolecule - - - cultured cell - cultured cell - - - B cell - B cell - - - human dwelling - - - catalytic activity - - - RNA-directed DNA polymerase activity - - - kinase activity - - - transferase activity - - - transferase activity, transferring phosphorus-containing groups - - - protein-containing complex - protein-containing complex - - - example to be eventually removed - example to be eventually removed - - - objective specification - objective specification - - - action specification - - - information carrier - information carrier - - - data item - data item - - - information content entity - information content entity - - - directive information entity - - - curation status specification - curation status specification - - - image - image - - - data about an ontology part - data about an ontology part - - - failed exploratory term - failed exploratory term - - - plan specification - plan specification - - - in branch - in branch - - - editor note - editor note - editor note - - - alternative term - alternative term - - - FossilSpecimen - metadata complete - metadata complete - - - organizational term - organizational term - - - ready for release - ready for release - - - metadata incomplete - metadata incomplete - - - uncurated - uncurated - - - pending final vetting - pending final vetting - - - is about - - - material information bearer - material information bearer - - - photograph - - - core - - - obsolescence reason specification - obsolescence reason specification - - - placeholder removed - placeholder removed - - - terms merged - terms merged - - - term imported - term imported - - - term split - term split - - - has obsolescence reason - has obsolescence reason - - - term tracker item - term tracker item - - - ontology term requester - - - document - - - denotator type - denotator type - denotator type - - - universal - universal - universal - - - is denotator type - is denotator type - - - defined class - defined class - defined class - - - named class expression - named class expression - named class expression - - - to be replaced with external ontology term - to be replaced with external ontology term - - - expand expression to - expand expression to - - - expand assertion to - expand assertion to - - - first order logic expression - first order logic expression - - - antisymmetric property - antisymmetric property - - - requires discussion - requires discussion - - - OBO foundry unique label - OBO foundry unique label - - - has ID digit count - - - has ID range allocated to - - - has ID policy for - - - has ID prefix - - - elucidation - elucidation - - - has associated axiom(nl) - has associated axiom(nl) - - - has associated axiom(fol) - has associated axiom(fol) - - - is allocated id range - is allocated id range - - - may be identical to - - - scheduled for obsoletion on or after - - - has axiom label - has axiom id - - - ontology module - - - base ontology module - - - editors ontology module - - - main release ontology module - - - bridge ontology module - - - import ontology module - - - subset ontology module - - - curation subset ontology module - - - analysis subset ontology module - - - single layer subset ontology module - - - exclusion subset ontology module - - - external import ontology module - - - species subset ontology module - - - reasoned ontology module - - - generated ontology module - - - template generated ontology module - - - taxonomic bridge ontology module - - - ontology module subsetted by expressivity - - - obo basic subset ontology module - - - ontology module subsetted by OWL profile - - - EL++ ontology module - - - Viruses - Viruses - - - Teleostomi - - - Euteleostomi - - - cellular organisms - - - Dipnotetrapodomorpha - - - Boreoeutheria - - - Bacteria - Bacteria - - - Homininae - - - Archaea - Archaea - - - Eukaryota - Eukaryota - - - Euarchontoglires - - - Simiiformes - - - Hominoidea - - - Tetrapoda - - - Amniota - - - Theria <mammals> - - - Opisthokonta - - - Metazoa - - - Bilateria - - - Deuterostomia - - - Haplorrhini - - - Mammalia - - - Eumetazoa - - - Chordata - - - Vertebrata <vertebrates> - - - Gnathostomata <vertebrates> - - - Sarcopterygii - - - Craniata <chordates> - - - Eutheria - - - Primates - - - Catarrhini - - - Hominidae - - - Homo - - - Homo sapiens - - - planned process - planned process - - - processed material - processed material - - - evaluant role - evaluant role - - - assay - assay - - - culture medium - culture medium - - - material processing - material processing - - - specimen role - specimen role - - - imaging assay - imaging assay - - - organization - organization - - - protocol - protocol - - - has_specified_input - has_specified_input - has_specified_input - - - is_specified_input_of - is_specified_input_of - is_specified_input_of - - - has_specified_output - has_specified_output - has_specified_output - - - is_specified_output_of - is_specified_output_of - is_specified_output_of - - - achieves_planned_objective - achieves_planned_objective - - - reverse transcriptase - reverse transcriptase - - - assay objective - assay objective - - - measure function - measure function - - - obsolete_the supplier role of Affymetrix - obsolete_the supplier role of Affymetrix - - - material separation objective - material separation objective - - - has grain - has grain - - - specimen collection process - specimen collection process - - - sample from organism - sample from organism - - - portioning objective - portioning objective - - - separation into different composition objective - separation into different composition objective - - - specimen collection objective - specimen collection objective - - - material sample role - material sample role - - - material sampling process - material sampling process - - - material maintenance objective - material maintenance objective - - - primary structure of DNA macromolecule - primary structure of DNA macromolecule - - - measurement device - measurement device - - - objective_achieved_by - objective_achieved_by - - - primary structure of RNA molecule - primary structure of RNA molecule - - - obsolete_the manufacturer role of Agilent - obsolete_the manufacturer role of Agilent - - - obsolete_manufacturer role of Bruker Daltonics - obsolete_manufacturer role of Bruker Daltonics - - - obsolete_the manufacturer role of Thermo - obsolete_the manufacturer role of Thermo - - - obsolete_the manufacturer role of Li-Cor - obsolete_the manufacturer role of Li-Cor - - - obsolete_the manufacturer role of Roche - obsolete_the manufacturer role of Roche - - - obsolete_the manufacturer role of Ambion - obsolete_the manufacturer role of Ambion - - - obsolete_the manufacturer role of BIO-RAD - obsolete_the manufacturer role of BIO-RAD - - - obsolete_the regulator role of the FDA - obsolete_the regulator role of the FDA - - - obsolete_the manufacturer role of Illumina - obsolete_the manufacturer role of Illumina - - - obsolete_the manufacturer role of Helicos - obsolete_the manufacturer role of Helicos - - - obsolete_manufacturer role of Bruker Corporation - obsolete_manufacturer role of Bruker Corporation - - - obsolete_the manufacturer role of Waters - obsolete_the manufacturer role of Waters - - - obsolete_manufacturer role of applied biosystems - obsolete_manufacturer role of applied biosystems - - - sequence data - sequence data - - - nucleic acid extract - nucleic acid extract - - - nucleic acid sequencer - nucleic acid sequencer - - - protein sequencer - protein sequencer - - - DNA sequence data - DNA sequence data - - - cell freezing medium - cell freezing medium - - - value specification - value specification - - - has value specification - has value specification - - - collection of specimens - collection of specimens - - - Thermo Fisher Scientific - Thermo Fisher Scientific - - - lymphocyte assay - lymphocyte assay - - - Epstein Barr virus transformed B cell - Epstein Barr virus transformed B cell - - - specimen - specimen - - - cultured cell population - cultured cell population - - - DNA sequencer - DNA sequencer - - - collecting specimen from organism - collecting specimen from organism - - - maintaining cell culture - maintaining cell culture - - - establishing cell culture - 'establishing cell culture' - - - sequencing assay - sequencing assay - - - nucleic acid extraction - nucleic acid extraction - - - collection of organisms - - - household - - - agricultural household - - - organismal entity - - - pair of interacting organisms - - - protein - protein - - - characteristic of - inheres in - inheres in - - - bearer of - bearer of - has characteristic - - - participates in - participates in - - - has participant - has participant - - - is concretized as - - - concretizes - - - function of - - - quality of - - - role of - - - has function - - - has quality - - - has role - - - has disposition - - - disposition of - - - derives from - derives from - - - derives into - - - 2D boundary of - - - has 2D boundary - - - directly regulated by - - - immediately preceded by - - - immediately precedes - - - regulates - - - negatively regulates - - - positively regulates - - - capable of - - - capable of part of - - - has input - - - acts upstream of - - - acts upstream of or within - - - causally upstream of, positive effect - - - causally upstream of, negative effect - - - ecologically related to - - - mereotopologically related to - - - enables - - - functionally related to - - - enabled by - - - regulated by - - - negatively regulated by - - - positively regulated by - - - member of - - - has member - - - input of - - - immediately causally downstream of - - - causally related to - - - immediately causally upstream of - - - causally upstream of or within - - - causally downstream of or within - - - involved in regulation of - - - involved in positive regulation of - - - involved in negative regulation of - - - involved in or involved in regulation of - - - is active in - - - interacts with - - - molecularly interacts with - - - phosphorylates - - - directly regulates activity of - - - helper property (not for use in curation) - - - is kinase activity - - - causal agent in process - - - causal relation between processes - - - causal relation between entities - - - causally influenced by - - - interaction relation helper property - - - molecular interaction relation helper property - - - causally influences - - - directly regulates - - - is a defining property chain axiom - - - causal relation between material entity and a process - - - capable of regulating - - - capable of negatively regulating - - - capable of positively regulating - - - process has causal agent - - - acts upstream of or within, positive effect - - - acts upstream of or within, negative effect - - - acts upstream of, positive effect - - - acts upstream of, negative effect - - - causally upstream of or within, negative effect - - - causally upstream of or within, positive effect - - - DEPRECATED inheres in - - - DEPRECATED bearer of - - - regulates activity of - - - region - region - - - material anatomical entity - material anatomical entity - - - anatomical cluster - anatomical cluster - - - obsolete evidence role - - - obsolete persistent evidence role - - - obsolete collecting process - - - obsolete observing process - - - obsolete being there process - - - obsolete printed report - - - obolete printed observational report - - - obsolete selecting process - - - obsolete submitting process - - - obsolete physical extraction process - - - obsolete material sampling process - - - obsolete data sampling process - - - obsolete statistical sampling process - - - obsolete locality description - - - obsolete material sample role - - - obsolete successful material sampling process - - - obsolete unsuccessful material sampling process - - - obsolete museum collection entity - - - obsolete organismal museum collection entity - - - obsolete museum collection - - - obsolete organismal museum collection - - - obsolete institution - - - obsolete organismal entity - - - obsolete organism or virus or viroid - - - obsolete material sample - - - obsolete data sample - - - obsolete statistical sample - - - obsolete protocol governed sampling activity - - - obsolete taxonomic identification process - - - obsolete process that yields a material representation of a material entity - - - obsolete material target of observation - - - obsolete process that yields an information artifact that is a representation of a material entity - - - obsolete material target of observation role - - - zooarcheological specimen - - - archeobotanical specimen - - - obsolete incorrect identifier format, do not use - - - obsolete zooarcheological specimen - - - obsolete archeobotanical specimen - - - Creator - Creator - - - Source - Source - - - description - - - - Date of formal issuance (e.g., publication) of the resource. Recommended best practice is to use an encoding scheme, such as ISO 8601:2004(E). - - - - Date Modified - date modified - Date on which the resource was changed. - Recommended practice is to describe the date, date/time, or period of time as recommended for the property Date, of which this is a subproperty. - - - This property is intended to be used with non-literal values. This property is an inverse property of Is Referenced By. - A related resource that is referenced, cited, or otherwise pointed to by the described resource. - References - references - - - - Accepted Taxon - - - Accepted Taxon ID - - - Access Constraints - - - Catalog Number Numeric - - - Dataset - - - Darwin Core Type - - - Event Attribute - - - Event Attribute Accuracy - - - Event Attribute Determined By - - - Event Attribute Determined Date - - - Event Attribute ID - - - Event Attribute Remarks - - - Event Attribute Type - - - Event Attribute Unit - - - Event Attribute - - - Event Measurement - - - Fossil Specimen - - - Generalizations - - - Geological Context - - - Higher Taxon - - - Higher Taxon ID - - - Human Observation - - - Identification - - - Living Specimen - - - Machine Observation - - - Material Citation - - - Material Sample - - - Measurement or Fact - - - Occurrence Measurement - - - Organism - - - Preserved Specimen - - - Previous Identifications - - - Related Basis of Record - - - Resource Relationship - - - Sample - - - Sample Attribute - - - Sample Attribute Accuracy - - - Sample Attribute Determined By - - - Sample Attribute Determined Date - - - Sample Attribute Remarks - - - Sample Attribute Unit - - - Sample Attribute Value - - - Sample Remarks - - - Sample Attribute ID - - - Sample Attribute Type - - - Sampling Event - - - Sampling Event Attributes - - - Sampling Event ID - - - Sampling Event Remarks - - - Sampling Location - - - Sampling Location ID - - - Sampling Location Remarks - - - Taxon ID - - - Accepted Scientific Name - - - Accepted Scientific Name ID - - - Accepted Taxon ID - - - Accepted Taxon Name - - - Accepted Taxon Name ID - - - According To - - - Accuracy - - - Associated Media - - - Associated Occurrences - - - Associated Organisms - - - Associated References - - - Associated Sequences - - - Associated Taxa - - - Basionym - - - Basionym ID - - - Bed - - - Behavior - - - Binomial - - - Catalog Number - - - Collection Code - - - Collection ID - - - Continent - - - Coordinate Precision - - - Coordinate Uncertainty In Meters - - - Country Code - countryCode - - - County - - - Cultivar Epithet - - - Data Generalizations - - - 2019-03-25 - Dataset Name - datasetName - - - Date Identified - - - Degree of Establishment - - - Disposition - - - Dynamic Properties - - - Earliest Age Or Lowest Stage - - - Earliest Eon Or Lowest Eonothem - - - Earliest Epoch Or Lowest Series - - - Earliest Era Or Lowest Erathem - - - Earliest Period Or Lowest System - - - Event Measurement Accuracy - - - Event Measurement Determined By - - - Event Measurement Determined Date - - - Event Measurement ID - - - Event Measurement Remarks - - - Event Measurement Type - - - Event Measurement Unit - - - Event Measurement Value - - - Footprint SRS - - - Footprint Spatial Fit - - - Footprint WKT - - - Formation - - - Generic Name - - - Geological Context ID - - - Georeference Protocol - - - Georeference Remarks - - - Georeference Sources - - - Georeference Verification Status - - - Georeferenced By - - - Georeferenced Date - - - Group - - - Higher Classification - - - Higher Geography - - - Higher Geography ID - - - Higher Taxon Name - - - Higher Taxon Name ID - - - Higher Taxon Concept ID - - - Highest Biostratigraphic Zone - - - Identification Attributes - - - Identification ID - - - Identification Qualifier - - - Identification References - - - Identification Remarks - - - Identification Verification Status - - - Identified By - - - Identified By ID - - - Individual ID - - - Information Withheld - - - Infrageneric Epithet - - - Institution Code - - - Institution ID - - - Island - - - Island Group - - - Latest AgeOr Highest Stage - - - Latest Eon Or Highest Eonothem - - - Latest Epoch Or Highest Series - - - Latest Era Or Highest Erathem - - - Latest Period Or Highest System - - - Lithostratigraphic Terms - - - Locality - - - Location According To - - - Location Attributes - - - Location Remarks - - - Lowest Biostratigraphic Zone - - - Material Sample ID - - - Maximum Depth In Meters - - - Maximum Distance Above Surface In Meters - - - Maximum Elevation In Meters - - - Measurement Accuracy - - - Measurement Determined By - measurementDeterminedBy - - - measurementDeterminedDate - Measurement Determined Date - - - Measurement ID - - - Measurement Type - - - Measurement Unit - - - Member - - - Minimum Depth In Meters - - - Minimum Distance Above Surface In Meters - - - Minimum Elevation In Meters - - - Municipality - - - Name According To - - - Name According To ID - - - Name Publication ID - - - Name Published In - - - Name Published In ID - - - Name Published In Year - - - Nomenclatural Status - - - Occurrence Attributes - - - Occurrence Details - - - Occurrence ID - - - Occurrence Measurement Accuracy - - - Occurrence Measurement Determined By - - - Occurrence Measurement Determined Date - - - Occurrence Measurement ID - - - Occurrence Measurement Remarks - - - Occurrence Measurement Type - - - Occurrence Measurement Unit - - - Occurrence Measurement Value - - - occurrenceRemarks - Occurrence Remarks - 2018-11-01 - - - Occurrence Status - - - 2018-03-21 - order - Order - - - Organism ID - - - Organism Name - - - Organism Quantity - - - Organism Quantity Type - - - Organism Remarks - - - Organism Scope - - - Original Name Usage - - - Original Name Usage ID - - - Other Catalog Numbers - - - Owner Institution Code - - - Parent Name Usage - - - Parent Name Usage ID - - - Pathway - - - phylum - Phylum - - - Point Radius Spatial Fit - - - preparations - Preparations - - - Previous Identifications - - - Record Number - - - Recorded By - - - Recorded By ID - - - Related Resource ID - - - Related Resource Type - - - Relationship According To - - - Relationship Established Date - - - Relationship Of Resource - - - Relationship Of Resource ID - - - Relationship Remarks - - - Reproductive Condition - - - Resource ID - - - Resource Relationship ID - - - Scientific Name Rank - - - Specific Epithet - - - State Province - - - Subfamily - - - Subgenus - - - Taxon According To - - - Taxon Attributes - - - Taxon Name ID - - - Taxon Remarks - - - Type Status - - - Verbatim Coordinate System - - - Verbatim Coordinates - - - Verbatim Depth - - - Verbatim Identification - - - Verbatim Latitude - - - Verbatim Locality - verbatimLocality - - - Verbatim Longitude - - - Verbatim SRS - - - Verbatim Scientific Name Rank - - - Verbatim Taxon Rank - - - Vernacular Name - - - Vertical Datum - - - Water Body - - - keyword - - A keyword is a descriptor in which the association of the word with the entity facilitates information retrieval. - - keyword - - - - is property of - is property of is a relation between a quality, capability or role and the entity that it and it alone bears. - is property of - - - - is identifier for - is identifier for - a relation between an identifier and an entity. - - - BEPlotID - 2018-03-21 - - - BEType - 2018-05-29 - - - BiodiversityExploratories - - - Exploratory - 2017-11-01 - - - OriginBE - - - basisOfRecordDescription - - - basisOfRecordDescription - - - dispersion - - - elevation - - - eventDate - - - eventID - - - - recommended - - A comma separated list of terms comprising the constrained vocabulary for categorical traits or ordinal binary traits. - 2017-09-27 - Ordinal traits may be encoded with numerically indexed factor levels; e.g. 1_egg, 2_larvae, 3_pupae, 4_adult; the field traitDescription should define the factor levels; - allowed_values_levels - 2017-11-15 - factorLevels - - - measurementAccuracy - - - measurementID - - - measurementMethod - - - measurementRemarks - - - measurementResolution - - - measurementValue_max - - - measurementValue_min - - - measurementID - - - morphotype - - - narrowerTerm - 2017-11-15 - - - occurrenceID - - - occurrenceID - - - references - - - relatedTerm - - - 2019-03-25 - scientificNameStd - - - sex - - - source - 2018-09-18 - Can be a DOI or bibliographic reference to the original publication of this trait definition - - recommended - The original source of the trait definition - - - statisticalMethod - - - traitName - 2019-03-25 - - - 2019-03-25 - traitNameStd - - - traitUnit - 2019-03-25 - - - 2019-03-25 - traitValueStd - - - traitName - - - traitNameStd - - - traitUnit - - - traitUnitStd - - - traitValue - - - traitValueStd - - - verbatimTraitName - - - verbatimTraitUnit - - - verbatimTraitValue - - - warnings - 2018-05-29 - - - Is Reviewed By - - indicates that A is reviewed by B - is reviewed by - - - The organizational or institutional affiliation of the contributor. - - has contributor affiliation - has Contributor Affiliation - - - Globally unique string that identifies the contributor (an individual or legal entity). - has contributor identifier - - has Contributor Identifier - - - Obsolete Class - - - context property description - context_property_description - - - - - - The date when a definition within the trait dictionary is revised. - date_trait_definition_modified - date trait definition modified - - - entity_type - - - A categorical variable specifying the entity corresponding to the trait values recorded. - entity type - - - locations$description - - A brief vegetation or location description. - location description - - - - Container - - - ContainerMembershipProperty - - - Resource - - - domain - - - A human-readable name for the subject. - label - label - - - - member - - - range - - - seeAlso - - - subClassOf - - - subPropertyOf - - - AllDifferent - - - AllDisjointClasses - - - AllDisjointProperties - - - Annotation - - - AnnotationProperty - - - AsymmetricProperty - - - Axiom - - - DataRange - - - DatatypeProperty - - - DeprecatedClass - - - DeprecatedProperty - - - FunctionalProperty - - - InverseFunctionalProperty - - - IrreflexiveProperty - - - NamedIndividual - - - NegativePropertyAssertion - - - ObjectProperty - - - Ontology - - - OntologyProperty - - - ReflexiveProperty - - - SymmetricProperty - - - TransitiveProperty - - - annotatedProperty - - - annotatedSource - - - annotatedTarget - - - assertionProperty - - - backwardCompatibleWith - - - bottomDataProperty - - - bottomObjectProperty - - - datatypeComplementOf - - - differentFrom - - - distinctMembers - - - equivalentClass - - - equivalentProperty - - - imports - - - intersectionOf - - - inverseOf - - - members - - - oneOf - - - priorVersion - - - propertyChainAxiom - - - propertyDisjointWith - - - - The property that determines that two given individuals are equal. - sameAs - same as - - - sourceIndividual - - - targetIndividual - - - targetValue - - - unionOf - - - versionIRI - - - Ordered Collection - - - - alternative label - alternative label - - - has broader transitive - - has broader transitive - By convention, skos:broaderTransitive is not used to make assertions. Rather, the properties can be used to draw inferences about the transitive closure of the hierarchical relation, which is useful e.g. when implementing a simple query expansion algorithm in a search application. - skos:broaderTransitive is a transitive superproperty of skos:broader. - - - change note - - - definition - - - editorial note - - - example - - - has top concept - - - hidden label - hidden label - - - - history note - - - in scheme - Relates a resource (for example a concept) to a concept scheme in which it is included. - is in scheme - A concept may be a member of more than one concept scheme. - - - - - Relates a collection to one of its members. - has member - has member - - - - has member list - - - notation - - - note - This property may be used directly, or as a super-property for more specific note types. - note - - A general note, for any purpose. - - - - preferred label - preferred label - - - scopeNote - scope note - - - This property should not be used directly, but as a super-property for all properties denoting a relationship of meaning between concepts. - is in semantic relation with - - is in semantic relation with - Links a concept to a concept related by meaning. - - - ORCID - https://orcid.org/ - ORCID - An identifier curated by ORCID, Inc. to denote some academic author. - - - - - - - diff --git a/inst/support/austraits.build_schema.yml b/inst/support/austraits.build_schema.yml deleted file mode 100644 index e7982ba77..000000000 --- a/inst/support/austraits.build_schema.yml +++ /dev/null @@ -1,419 +0,0 @@ - -entity_type: - description: &entity_type A categorical variable specifying the entity corresponding to the trait values recorded. - type: categorical - values: - individual: Value comes from a single individual. - population: Value represents a summary statistic from multiple individuals at a single location. - metapopulation: Value represents a summary statistic from individuals of the taxon across multiple locations. - species: Value represents a summary statistic for a species or infraspecific taxon across its range or as estimated by an expert based on their knowledge of the taxon. Data fitting this category include estimates from reference books that represent a taxon's entire range and values for categorical variables obtained from a reference book or identified by an expert. - genus: Value represents a summary statistic or expert score for an entire genus. - family: Value represents a summary statistic or expert score for an entire family. - order: Value represents a summary statistic or expert score for an entire order. - -value_type: - description: &value_type A categorical variable describing the statistical nature of the trait value recorded. - type: categorical - values: - raw: Value recorded for an entity. - minimum: Value is the minimum of values recorded for an entity. - mean: Value is the mean of values recorded for an entity. - median: Value is the median of values recorded for an entity. - maximum: Value is the maximum of values recorded for an entity. - mode: Value is the mode of values recorded for an entity. This is the appropriate value type for a categorical trait value. - range: Value is a range of values recorded for an entity. - bin: Value for an entity falls within specified limits. - unknown: Not currently known. - -basis_of_value: - description: &basis_of_value A categorical variable describing how the trait value was obtained. - type: categorical - values: - measurement: Value is the result of a measurement(s) made on a specimen(s). - expert_score: Value has been estimated by an expert based on their knowledge of the entity. - model_derived: Value is derived from a statistical model, for example via gap-filling. - unknown: Not currently known. - -basis_of_record: - description: &basis_of_record A categorical variable specifying from which kind of specimen traits were recorded. - type: categorical - values: - field: Traits were recorded on entities living naturally in the field. - field_experiment: Traits were recorded on entities living under experimentally manipulated conditions in the field. - captive_cultivated: Traits were recorded on entities living in a common garden, arboretum, or botanical or zoological garden. - lab: Traits were recorded on entities growing in a lab, glasshouse or growth chamber. - preserved_specimen: Traits were recorded from specimens preserved in a collection, eg. herbarium or museum - literature: Traits were sourced from values reported in the literature, and where the basis of record is not otherwise known. - -#------------------------------------------------------------- -# A key describing the structure of the compiled `AusTraits` dataset - -austraits: - description: The compiled `AusTraits` dataset. - type: list - elements: - traits: - description: A table containing measurements of traits. - type: table - elements: - dataset_id: &dataset_id Primary identifier for each study contributed to AusTraits; most often these are scientific papers, books, or online resources. By default this should be the name of the first author and year of publication, e.g. `Falster_2005`. - taxon_name: &taxon_name Scientific name of the taxon on which traits were sampled, without authorship. When possible, this is the currently accepted (botanical) or valid (zoological) scientific name, but might also be a higher taxonomic level. - observation_id: &observation_id A unique integral identifier for the observation, where an observation is all measurements made on an individual at a single point in time. It is important for joining traits coming from the same `observation_id`. Within each dataset, observation_id's are unique combinations of `taxon_name`, `population_id`, `individual_id`, and `temporal_id`. - trait_name: &trait_name Name of the trait sampled. Allowable values specified in the table `definitions`. - value: &value The measured value of a trait, location property or context property. - unit: &units Units of the sampled trait value after aligning with AusTraits standards. - entity_type: *entity_type - value_type: *value_type - basis_of_value: *basis_of_value - replicates: &replicates Number of replicate measurements that comprise a recorded trait measurement. A numeric value (or range) is ideal and appropriate if the value type is a `mean`, `median`, `min` or `max`. For these value types, if replication is unknown the entry should be `unknown`. If the value type is `raw_value` the replicate value should be 1. If the trait is categorical or the value indicates a measurement for an entire species (or other taxon) replicate value should be `.na`. - basis_of_record: *basis_of_record - life_stage: &life_stage A field to indicate the life stage or age class of the entity measured. Standard values are `adult`, `sapling`, `seedling` and `juvenile`. - population_id: &population_id A unique integer identifier for a population, where a population is defined as individuals growing in the same location (location_id /location_name) and plot (plot_id, a context category) and being subjected to the same treatment (treatment_id, a context category). - individual_id: &individual_id A unique integer identifier for an individual, with individuals numbered sequentially within each dataset by taxon by population grouping. Most often each row of data represents an individual, but in some datasets trait data collected on a single individual is presented across multiple rows of data, such as if the same trait is measured using different methods or the same individual is measured repeatedly across time. - temporal_id: &temporal_id A unique integer identifier assigned where repeat observations are made on the same individual (or population, or taxon) across time. The identifier links to specific information in the context table. - source_id: &source_id For datasets that are compilations, an identifier for the original data source. - location_id: &location_id A unique integer identifier for a location, with locations numbered sequentially within a dataset. The identifier links to specific information in the location table. - entity_context_id: &entity_context_id A unique integer identifier indicating specific contextual properties of an individual, possibly including the individual's sex or caste (for social insects). - plot_id: &plot_id A unique integer identifier for a plot, where a plot is a distinct collection of organisms within a single geographic location, such as plants growing on different aspects or blocks in an experiment. The identifier links to specific information in the context table. - treatment_id: &treatment_id A unique integer identifier for a treatment, where a treatment is any experimental manipulation to an organism's growing/living conditions. The identifier links to specific information in the context table. - collection_date: &collection_date Date sample was taken, in the format `yyyy-mm-dd`, `yyyy-mm` or `yyyy`, depending on the resoluton specified. Alternatively an overall range for the study can be indicating, with the starting and ending sample date sepatated by a `/`, as in 2010-10/2011-03 - measurement_remarks: &measurement_remarks Brief comments or notes accompanying the trait measurement. - method_id: &method_id A unique integer identifier indicating a trait is measured multiple times on the same entity, with different methods used for each entry. This field is only used if a single trait is measured using multiple methods within the same dataset. The identifier links to specific information in the context table. - original_name: &original_name Name given to taxon in the original data supplied by the authors. - locations: - description: A table containing observations of location/site characteristics associated with information in `traits`. Cross referencing between the two dataframes is possible using combinations of the variables `dataset_id`, `location_name`. - type: table - elements: - dataset_id: *dataset_id - location_id: *location_id - location_name: &location_name location name - location_property: The location characteristic being recorded. The name should include units of measurement, e.g. `MAT (C)`. Ideally we have at least the following variables for each location, `longitude (deg)`, `latitude (deg)`, `description`. - value: The measured value of a location property. - contexts: - description: A table containing observations of contextual characteristics associated with information in `traits`. Cross referencing between the two dataframes is possible using combinations of the variables `dataset_id`, `link_id`, and `link_vals`. - type: table - elements: - dataset_id: *dataset_id - context_property: The contextual characteristic being recorded. If applicable, name should include units of measurement, e.g. `CO2 concentration (ppm)`. - category: The category of context property, with options being `plot`, `treatment`, `individual_context`, `temporal` and `method`. - value: The measured value of a context property. - description: Description of a specific context property value. - link_id: Variable indicating which identifier column in the traits table contains the specified `link_vals`. - link_vals: Unique integer identifiers that link between identifier columns in the `traits` table and the contextual properties/values in the `contexts` table. - methods: - description: A table containing details on methods with which data were collected, including time frame and source. Cross referencing with the `traits` table is possible using combinations of the variables `dataset_id`, `trait_name`. - type: table - elements: - dataset_id: *dataset_id - trait_name: *trait_name - methods: &methods A textual description of the methods used to collect the trait data. Whenever available, methods are taken near-verbatim from the referenced source. Methods can include descriptions such as 'measured on botanical collections', 'data from the literature', or a detailed description of the field or lab methods used to collect the data. - description: &description A 1-2 sentence description of the purpose of the study. - sampling_strategy: &sampling_strategy A written description of how study locations were selected and how study individuals were selected. When available, this information is lifted verbatim from a published manuscript. For preserved specimens, this field ideally indicates which records were 'sampled' to measure a specific trait. - source_primary_key: Citation key for the primary source in `sources`. The key is typically formatted as `Surname_year`. - source_primary_citation: Citation for the primary source. This detail is generated from the primary source in the metadata. - source_secondary_key: Citation key for the secondary source in `sources`. The key is typically formatted as `Surname_year`. - source_secondary_citation: Citations for the secondary source. This detail is generated from the secondary source in the metadata. - source_original_dataset_key: Citation key for the original dataset_id in sources; for compilations. The key is typically formatted as `Surname_year`. - source_original_dataset_citation: Citations for the original dataset_id in sources; for compilationse. This detail is generated from the original source in the metadata. - data_collectors: &data_collectors The person (people) leading data collection for this study. - assistants: &assistants Names of additional people who played a more minor role in data collection for the study. - dataset_curators: &austraits_curators Names of AusTraits team member(s) who contacted the data collectors and added the study to the AusTraits repository. - excluded_data: - description: A table of data that did not pass quality tests and so were excluded from the master dataset. The structure is identical to that presented in the `traits` table, only with an extra column called `error` indicating why the record was excluded. Common reasons are missing_unit_conversions, missing_value, and unsupported_trait_value. - type: table - elements: - error: Indicating why the record was excluded. Common reasons are missing_unit_conversions, missing_value, and unsupported_trait_value. - dataset_id: *dataset_id - taxon_name: *taxon_name - observation_id: *observation_id - trait_name: *trait_name - value: The measured value of a trait. - unit: *units - entity_type: *entity_type - value_type: *value_type - basis_of_value: *basis_of_value - replicates: *replicates - basis_of_record: *basis_of_record - life_stage: *life_stage - population_id: *population_id - individual_id: *individual_id - temporal_id: *temporal_id - source_id: *source_id - location_id: *location_id - entity_context_id: *entity_context_id - plot_id: *plot_id - treatment_id: *treatment_id - collection_date: *collection_date - measurement_remarks: *measurement_remarks - method_id: *method_id - original_name: *original_name - taxonomic_updates: - description: A table of all taxonomic changes implemented in the construction of AusTraits. Changes are determined by comparing the originally submitted taxon name against the taxonomic names listed in the taxonomic reference files, best placed in a subfolder in the `config` folder . Cross referencing with the `traits` table is possible using combinations of the variables `dataset_id` and `taxon_name`. - type: table - elements: - dataset_id: *dataset_id - original_name: *original_name - aligned_name: The taxon name without authorship after implementing automated syntax standardisation and spelling changes as well as manually encoded syntax alignments for this taxon in the metadata file for the corresponding `dataset_id`. This name has not yet been matched to the currently accepted (botanical) or valid (zoological) taxon name in cases where there are taxonomic synonyms, isonyms, orthographic variants, etc. - taxonomic_resolution: &taxonomic_resolution The rank of the most specific taxon name (or scientific name) to which a submitted orignal name resolves. - aligned_scientific_name_id: An identifier for the cleaned name before it is updated to the currently accepted name usage. This may be a global unique identifier or an identifier specific to the data set. Must be resolvable within this dataset. - aligned_name_taxonomic_status: The status of the use of the `aligned_name` as a label for a taxon. Requires taxonomic opinion to define the scope of a taxon. Rules of priority then are used to define the taxonomic status of the nomenclature contained in that scope, combined with the experts opinion. It must be linked to a specific taxonomic reference that defines the concept. - aligned_name_alternative_taxonomic_status: The taxonomic status of alternative taxonomic records with `aligned_name` as the accepted (botanical) or valid (zoological) taxon name. - taxon_id: &taxon_id An identifier for the set of taxon information (data associated with the taxon class). May be a global unique identifier or an identifier specific to the data set. Must be resolvable within this dataset. - taxon_name: *taxon_name - taxa: - description: A table containing details on taxa associated with information in `traits`. Whenever possible, this information is sourced from curated taxon lists that include identifiers for each taxon. The information compiled in this table is released under a CC-BY3 license. Cross referencing between the two dataframes is possible using combinations of the variable `taxon_name`. - type: table - elements: - taxon_name: *taxon_name - taxonomic_dataset: Name of the taxonomy (tree) that contains this concept. ie. APC, AusMoss etc. - taxon_rank: The taxonomic rank of the most specific name in the scientific name. - trinomial: The infraspecific taxon name match for an original name. This column is assigned `na` for taxon name that are at a broader taxonomic_resolution. - binomial: The species-level taxon name match for an original name. This column is assigned `na` for taxon name that are at a broader taxonomic_resolution. - genus: Genus of the taxon without authorship. - family: Family of the taxon. - taxon_distribution: Known distribution of the taxon, by Australian state. - establishment_means: Statement about whether an organism or organisms have been introduced to a given place and time through the direct or indirect activity of modern humans. - taxonomic_status: The status of the use of the scientificName as a label for the taxon in regard to the 'accepted (or valid) taxonomy'. The assigned taxonomic status must be linked to a specific taxonomic reference that defines the concept. - scientific_name: The full scientific name, with authorship and date information if known. - scientific_name_authorship: The authorship information for the scientific name formatted according to the conventions of the applicable. - taxon_id: *taxon_id - scientific_name_id: An identifier for the set of taxon information (data associated with the taxon class). May be a global unique identifier or an identifier specific to the data set. Must be resolvable within this dataset. - contributors: - description: A table of people contributing to each study. - type: table - elements: - dataset_id: *dataset_id - last_name: Last name of the data collector. - given_name: Given names of the data collector. - ORCID: ORCID of the data collector. - affiliation: Last known institution or affiliation. - additional_role: Additional roles of data collector, mostly contact person. - sources: - description: Bibtex entries for all primary and secondary sources in the compilation. - type: list - definitions: - description: A copy of the definitions for all tables and terms. Information included here was used to process data and generate any documentation for the study. - type: categorical - value: A structured yaml file, represented as a list in R. See file `config/traits.yaml` for more details. - schema: - description: A copy of the schema for all tables and terms. Information included here was used to process data and generate any documentation for the study. - type: categorical - value: A structured yaml file, represented as a list in R. - metadata: - description: Metadata associated with the dataset, including title, creators, license, subject, funding sources. - type: list - elements: - title: A name or title by which the dataset is known. - description: A brief description of the resource. - version: Version number of the dataset. - doi: A unique doi string that identifies a released version of the resource. - structure_URI: The URI of the repository with the source code that compiles the database. - geo_location: - description: Spatial region or named place where the data was gathered or about which the data is focused. - type: table - elements: - geo_location_place: The broadest geographic location that encompasses the scope of the dataset. - geodetic_datum: The global datum reference or reference frame in which geospatial information is reported in the dataset. Best practise is to use codes from the EPSG Geodetic Parameter Dataset. - language: The primary language of the resource. - related_identifiers: - description: Identifiers of related resources. - type: table - elements: - related_identifier_type: The type of the related_identifier. - identifier: The related identifier. - relation: Description of the relationship of the related resource to the primary resource. - resource_type: The general type of a resource (database), using controlled values from DataCite, https://support.datacite.org/docs/schema-40. - references: Resource that this resource references. - publisher: The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource. - publication_date: The data when this resource was or will be made publicly available. - publication_year: The year when this resource was or will be made publicly available. - license: - description: License applying to the resource. - type: table - elements: - rights: Any rights information for this resource. - rights_holder: Person or institution owning or managing property rights, including intellectual property rights over the resource. - rights_URI: The URI of the license. - description: A verbal description of the license - subject: - description: Subject, keyword, classification code, or key phrase describing the resource. - type: table - elements: - subject_scheme: The name of the subject scheme or classification code or authority if one is used. - scheme_URI: The URI of the subject identifier scheme. - values_URI: The URI of the subject term. - funding_reference: - description: Information about financial support (funding) for the resource being registered. - type: table - elements: - funder_name: Name of the funding provider. - award_IRI: The URI leading to a page provided by the funder for more information about the award (grant). - award_number: The funding provider's number of the award (grant). - award_title: The title of the award (grant). - creators: Dataset creators. - contributors: Dataset contributors, reflecting names included in the contributors table. - build_info: - description: A description of the computing environment used to create this version of the dataset, including version number, git commit and R session_info. - type: list - elements: - version: - description: Version number of the dataset. - type: categorical - value: - git_SHA: Commit in git repository. - session_info: - description: The versions of R, packages, and data used to generate the dataset. - type: categorical - value: A structured yaml file, represented as a list in R. - -#------------------------------------------------------------- - -metadata: - description: &metadata Structured recording of metadata for each individual study, as entered into AusTraits. Information included in `locations` and `methods` are derived from this file. Also includes information on mapping of trait data into standard terms and units, plus any taxonomic changes implemented. - type: list - elements: - source: - description: Citation details for the original source(s) for the data, whether it is a published journal article, book, website, or thesis. - type: categorical - values: - primary: - description: &source_primary The original study in which data were collected. - type: catergorical - values: - key: &key The key is used to identify the exact reference using authors last name and year of publication. - bibtype: &bibtype Entry type for reference source e.g. Article, Book, Thesis, Unpublished. - year: &year The year that the reference was published, or written in the case of unpublished articles. - author: &author Names of all the authors for the reference. - title: &title The title of the reference. - journal: &journal Journal in which the article was published. - volume: &volume The volume number of the article or book. - number: &number The issue number for a journal article. - pages: &pages The page numbers for a reference. - doi: &doi The digital object identifier. - url: &url The URL of of a web page. - type: &type The type of thesis which can include PhD, Masters, Honours. - institution: &institution The institution that published or sponsored the report or thesis. - publisher: &publisher The name of the publisher. - isbn: &isbn The International Standard Book Number of a book or report. - place: &place The location where the reference was written or published. - note: ¬e Additional notes for the reference which are not contained in the other fields. - secondary: - description: &source_secondary A subsequent study where data were compiled or re-analysed. - type: categorical - values: - key: *key - bibtype: *bibtype - year: *year - author: *author - journal: *journal - title: *title - volume: *volume - number: *number - pages: *pages - doi: *doi - url: *url - type: *type - institution: *institution - publisher: *publisher - isbn: *isbn - place: *place - note: *note - contributors: - description: A list of contributors to the study, their respective affiliations, roles in the study, and ORCIDs. - type: array - elements: - data_collectors: - description: *data_collectors - type: categorical - elements: - last_name: Last name of data collector. - given_name: Given name of data collector. - affiliation: Affiliation of data collector. - ORCID: ORCID ID (Open Researcher and Contributor ID) for the data collector, if available. - notes: optional notes for the data collector. - additional_role: Any additional roles the data collector had in the study, a field most frequently used to identify which data contributor is the contact person for the dataset. - assistants: - description: *assistants - dataset_curators: - description: *austraits_curators - dataset: - description: Study details, including format of the data, custom R code applied to data, and various descriptors. The value entered for each element can be either a header for a column within the data.csv file or the actual value to be used. - type: categorical - values: - data_is_long_format: Indicates if the data spreadsheet has a vertical (long) or horizontal (wide) configuration with `yes` or `no` terminology. - custom_R_code: A field where additional R code can be included. This allows for custom manipulation of the data in the submitted spreadsheet into a different format for easy integration with AusTraits. `.na` indicates no custom R code was used. - collection_date: *collection_date - taxon_name: *taxon_name - location_name: *location_name - source_id: *source_id - entity_type: *entity_type - plot_id: *plot_id - treatment_id: *treatment_id - individual_id: *individual_id - observation_id: *observation_id - trait_name: Element required for long datasets to specify the column indicating the trait name associated with each row of data. - value: The measured value of a trait. - description: *description - basis_of_record: *basis_of_record - life_stage: *life_stage - replicates: *replicates - sampling_strategy: *sampling_strategy - measurement_remarks: *measurement_remarks - original_file: The name of the file initially submitted to AusTraits. - notes: Generic notes about the study and processing of data. - locations: - description: A list of study locations (sites) and information about each of the study locations where data were collected. Each should include at least three variables - `latitude (deg)`, `longitude (deg)` and `description`. Additional variables can be included where available. Set to `.na` for botanical collections and field studies where data values are a mean across many locations. - type: array - elements: - location_name: The location name used by the data contributor or a generic location name designated by AusTraits when one is not provided. - description: A brief vegetation or location description. - latitude (deg): Location latitude, in decimal degrees. - longitude (deg): Location latitude, in decimal degrees. - locality: An identifiable place name (optional field). - contexts: - description: Contextual characteristics associated with information in `traits`. - type: array - elements: - context_property: The context property represented by the data in the column specified by `var_in`. - category: The category of contextual data. Options are `plot` (a distinct collection of organisms within a single geographic location, such as plants growing on different aspects or blocks in an experiment), `treatment` (an experimental treatment), `entity_context` (contextual information to record about the entity the isn't documented elsewhere, including the entity's sex, caste), `temporal` (indicating when repeat observations are made on the same individual (or population, or taxon) across time) and `method` (indicating the same trait was measured on the same individual (or population, or taxon) using multiple methods). - var_in: Name of column with contextual data in the original data submitted. - find: The contextual values in the original data submitted (optional) - value: The standardised contextual values, aligning syntax and wording with other studies. - description: A description of the contextual values. - traits: - description: A translation table, mapping traits and units from a contributed study onto corresponding variables in AusTraits. The methods used to collect the data are also specified here. - type: array - elements: - var_in: Name of trait in the original data submitted. - unit_in: Units of trait in the original data submitted. - trait_name: *trait_name - entity_type: *entity_type - value_type: *value_type - basis_of_record: *basis_of_record - basis_of_value: *basis_of_value - replicates: *replicates - measurement_remarks: *measurement_remarks - methods: *methods - life_stage: *life_stage - substitutions: - description: A list of any "find and replace" substitutions needed to get the data into the right format. - type: array - values: - trait_name: Trait where substitutions are required. - find: Contributor's trait value that needs to be changed. - replace: AusTraits supported replacement value. - taxonomic_updates: - description: A table of taxonomic name changes needed to align original names in the dataset with taxon names in the chosen taxonomic reference(s). - type: array - values: - find: *original_name - replace: *taxon_name - reason: Records why the change was implemented, e.g. `typos`, `taxonomic synonyms`, and `standardising spellings` - exclude_observations: - description: A table of observations to remove from the compilation. - type: array - values: - variable: A variable from the traits table, typically `taxon_name` or `location_name` - find: Value of variable to remove. - reason: Records why the data was removed, e.g. `exotic` - questions: A place to record any queries we have about the dataset (recorded as a named array), including notes on any additional traits that may have been collected in the study but have not been incorporated into AusTraits. diff --git a/inst/support/report_dataset.Rmd b/inst/support/report_dataset.Rmd deleted file mode 100644 index 9fcfd11c7..000000000 --- a/inst/support/report_dataset.Rmd +++ /dev/null @@ -1,809 +0,0 @@ ---- -title: Report on dataset from `AusTraits` data compilation -output: - html_document: - df_print: kable - highlight: tango - keep_md: no - smart: no - theme: yeti - toc: yes - toc_depth: 3 - toc_float: - collapsed: false - smooth_scroll: true -params: - dataset_id: provide - austraits: provide -editor_options: - chunk_output_type: console ---- - - - - - -```{r setup, echo=FALSE, message=FALSE, warning=FALSE, results="hide"} -# knitr defaults -knitr::opts_chunk$set(echo=FALSE, cache=FALSE) - -# default for table format -options(knitr.table.format = "html") - -# remove warnings from dplyr about "summarise" -options(dplyr.summarise.inform = FALSE) - -# Guidelines for writing report code -# - use tidyverse style and format: http://htmlpreview.github.io/?https://github.com/nicercode/2018_BEES_regression/blob/master/tidyverse.html -# - use kableExtra for styling: https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html -# - use knitr chunck options: https://rmarkdown.rstudio.com/lesson-3.html - -# Some useful functions - - -#' Generate hyperlink for markdown and html -#' -#' Generate hyperlink for markdown and html files -#' -#' @param link character string for the url link -#' @param text character string for the text to display -#' @param type file type, default is markdown "md" otherwise html -#' -#' @return character string with the text and link formatted for md and html -#' @export -#' -#' @examples as_link("www.austraits.org", "austraits") -as_link <- function(link, text, type = "md") { - if (type == "md") { - sprintf("[%s](%s)", text, link) - } else { - sprintf(" %s ", link, text) - } -} - - -#' Get SHA link from Github -#' -#' Get SHA link using the util_get_SHA() function. The link generated leads to the latest -#' commit for the Github repository. SHA is the abbreviated SHA-1 40 digit -#' hexadecimal number which Github uses to track commits and changes made to a repository. -#' -#' @param ... arguments passed to the util_get_SHA() -#' -#' @return SHA link to a github commit as a character string formatted using markdown syntax -util_get_SHA_link <- function(...) { - sha <- util_get_SHA(...) - as_link(sprintf("https://github.com/traitecoevo/austraits/tree/%s", sha), sha) -} - -``` - -```{r, echo=FALSE, message=FALSE, warning=FALSE, results="hide"} -library(traits.build) -library(austraits) -library(knitr) -library(kableExtra) - - -my_kable_styling <- util_kable_styling_html - -definitions <- austraits$definitions -data_study <- extract_dataset(austraits, dataset_id) -metadata <- read_metadata(file.path("data", dataset_id, "metadata.yml")) -schema <- austraits$schema - -# start notetaker device -questions <- traits.build:::notetaker_start() - -new_question <- function(txt) { - x <- questions( traits.build:::notetaker_add_note, traits.build:::notetaker_as_note(txt)) %>% traits.build:::notetaker_print_notes(as_anchor=TRUE) - writeLines(sprintf("**Question:** %s\n\n", paste0(x, collape=""))) -} - -# Variable to record when definitions have been specified - -structures_already_defined <- c("names") - - -``` - -# Introduction - -## About AusTraits - -`AusTraits` is an open-source compilation of trait data for Australian plant taxa. The traits we are considering are any morphological, functional or physiological variable that: 1. Tend to vary widely among taxa; and 2. Have been collected across a moderate number of taxa. Many people collect trait data, and as such, data is spread across many sources, under a variety of formats and terms. - -AusTraits combines all of these data fragments into a harmonised, error checked, publicly available compilation. - -AusTraits is built from many different sources, here referred to as `studies` (so called because the sources are most-often individual scientific papers), with each study denoted by a distinct `dataset_id`. Our data processing pipeline seeks to combine all of the different studies in a transparent and reproducible way. In addition, the AusTraits repository consists of configuration files defining the list of known taxa, the definitions for each trait and table, and appropriate unit conversions. Detailed descriptions of the `definitions`, `database structure`, and `file format` are all available on the [project homepage](http://traitecoevo.github.io/austraits.build/) under the `Details` tab. - -## Purpose of this report - -The AusTraits compilation includes data from a study with dataset_id **` `r dataset_id` `**. This document describes the information we have on that study. -This report gives both you, as a data contributor, and us, as AusTraits compilers, the opportunity to: - -1. Verify that our understanding and handling of your data is correct -2. Provide any missing information about the data - -Data from each study in AusTraits is organised into two files, within its own folder: - -- ` `r dataset_id`/data.csv`: contains the primary trait measurements, in either long or wide format -- ` `r dataset_id`/metadata.yml`: contains all the contextual data (or metadata) about the trait measurements. - -Data from those files has been used to generate the report below. - -Please review the report and the original files, check they are correct, and send updates where appropriate. Please also answer the queries in the report below, appearing in orange. These appear again as a list at the end of the document. - -We would welcome any of the following: - -- Answers to the list of questions -- Expanding or correcting the information in the `metadata.yml` -- Updates to the data - -To aid your review, we have attached the following files: - -- `r dataset_id`/data.csv` and ` `r dataset_id`/metadata.yml`: The raw inputs for your study. - -### Send feedback by - -1. **Copying and pasting the questions at the end of the report into an email and answering as many as possible.** - - The majority of the questions are automatically generated prompts to look at the pertinent metadata or data plot (or table) and confirm it matches your expectations - for these simply answer "yes". There are often a few questions targeted to your study, written by an AusTraits data curator as they merged in your study. Please consider these carefully, as they usually indicate missing metadata or some source of ambiguity (e.g. units not provided, uncertainty about trait value substitutions). -2. If appropriate, send a new spreadsheet to replace the existing `data.csv` file. -3. You may edit the `metadata.yml` file associated with your study yourself, but most contributors prefer to answer the flagged questions and have an AusTraits data curator make the changes to the file. -4. Sending back your responses and any edited files. - - -```{r, results='asis', echo=FALSE} -# Any additional questions from metadata - -if(!is.null(metadata$questions) & !is.na(metadata$questions[1])) { - - x <- names(metadata$questions)[ !(names(metadata$questions) %in% c( - "additional_traits", "austraits") - ) ] - - if(length(x) > 0) - writeLines("# Major questions\n\n") - - for(i in seq_along(x)) - new_question(sprintf("(section `general`) %s", metadata$questions[[i]])) -} -``` - - -# Study details - -We collect information about the context of the trait measurements. This includes details on the people involved, the sources, and details about type of collection, including location and time of collections. - -## Contributors - -The following people are listed as data collectors in this study: - -```{r, results='asis', echo=FALSE} -metadata$contributors$data_collectors %>% - util_list_to_df2() %>% - my_kable_styling() -``` - -If a current affiliation isn't known, provide the last known institution. We also require an email address for all people. People involved in the study who cannot be tracked down, need to be designated as assistants. -In addition, the following people are included as - -- assistants: `r ifelse(is.null(metadata$contributors$assistants), "none", paste(metadata$contributors$assistants, collapse=", "))` -- data curator(s): `r metadata$contributors$austraits_curators` - -```{r, results='asis', echo=FALSE} -new_question("(section `people`) Are all appropriate people listed, with appropriate details?") -``` - -## Sources - -Data from this study is recorded as coming from the following sources: - -Primary citation: - -```{r, results='asis', echo=FALSE} -metadata$source$primary %>% - util_list_to_df1() %>% - my_kable_styling() -``` - -```{r, results='asis', echo=FALSE} -n <- length(metadata$source) -if(n > 1) { - writeLines("Secondary citations:") - - for(i in 2:n) { - metadata$source[[i]] %>% - util_list_to_df1() %>% - my_kable_styling() %>% - cat() - writeLines("\n\n") - } -} -``` - - -```{r, results='asis', echo=FALSE} -new_question("(section `source`) Are the citation details for this study correct?") -``` - -## General description - -The description we have of the study is as follows: - -```{r, results='asis', echo=FALSE} -metadata$dataset %>% - util_list_to_df1() %>% - my_kable_styling() -``` - -```{r, results='asis', echo=FALSE} -new_question("(section `dataset`) Can you provide more detailed information for any of these variables?") -``` - -```{r, results='asis', echo=FALSE} -missing <- metadata$dataset %>% - util_list_to_df1() %>% - filter(value == "unknown") %>% - pull(key) - -for(v in missing) { - sprintf("(section `dataset`) Can you provide missing details for the variable `%s`?\n", v) %>% - new_question() - sprintf("The variable `%s` captures *'%s'*\n", v, schema$metadata$elements$dataset$values[[v]]) %>% - writeLines() -} -``` - - -## Locations - -Data were collected at the following sites and locations. As a minimum, we are aiming to collect, the variables - -```{r, results='asis', echo=FALSE, warning=FALSE} - -create_if_missing <- function(df, var, type=rep(NA_character_, nrow(df))) { - if(is.null(df[[var]])) - df[[var]] <- type - df -} - -location_main <- c("location_name", "longitude (deg)","latitude (deg)", "description") -locations <- - full_join(by="location_id", - # count of records by location - data_study$traits %>% - mutate(location_id = ifelse(is.na(location_id), "unknown", location_id)) %>% - group_by(location_id) %>% - summarise(records = n()), - # details on locations from metadata - data_study$locations %>% - filter(location_property %in% location_main) %>% - select(-dataset_id) %>% - spread(location_property, value) %>% - create_if_missing("location_id") %>% - create_if_missing("location_name") %>% - create_if_missing("latitude (deg)") %>% - create_if_missing("longitude (deg)") %>% - create_if_missing("description") %>% - mutate( - `latitude (deg)` = as.numeric(`latitude (deg)`), - `longitude (deg)` = as.numeric(`longitude (deg)`) - ) - ) %>% - select(location_id, location_name, records, `latitude (deg)`, `longitude (deg)`, description) - - locations %>% - my_kable_styling() - - -if( any(tolower(locations$location_name) %in% c(NA, "unknown") ) ) { - new_question("(section `locations`) **Location data incomplete or unknown!** Can you provide location details where your data were sampled? If data are from a National Park, biological reserve, or other location with a name, but not linked to a specific site within the park, the site name should be that national park (or similar), and we will use the park's headquarters as the GPS coordinates. If the data are from herbarium specimens or a taxa monograph it is appropriate to leave this information blank.") -} - -``` - -```{r maps, eval= TRUE, echo=FALSE, warning=FALSE, message=FALSE, fig.keep="all"} - -# Make a map of study locations using leaflet -locations <- locations %>% filter(!is.na(`latitude (deg)`) & !is.na(`longitude (deg)`)) -if(nrow(locations) > 0 ) { - leaflet::leaflet() %>% - leaflet::addTiles() %>% - leaflet::addScaleBar() %>% - leaflet::addMiniMap(zoomLevelFixed=2) %>% - leaflet::addMarkers(lng = locations$`longitude (deg)`, lat = locations$`latitude (deg)`, - label = locations$location_name) -} -``` - -```{r, results='asis', echo=FALSE} -new_question("(section `locations`) Do site details look complete and accurate? As a minimum we would like `latitude`, `longitude`, `description`.") -``` - -## Additional location data (optional) - -The following variables have been recorded for the locations included in the study: - -```{r, results='asis', echo=FALSE} - -data_study$locations %>% - filter(!(location_property %in% location_main)) %>% - select(-dataset_id) %>% - spread(location_property, value) -> locations_extra - -if(nrow(locations_extra) > 0 ) { - locations_extra %>% - my_kable_styling() %>% - writeLines() - - new_question("(section `locations`) Do the additional site details look complete?") - } else{ - writeLines("There is currently no additional site data associated with this dataset.") -} - -``` - -## Contexts - - -The following contexts have been recorded for the measurements included in this study. - -```{r, results='asis', echo=FALSE} -data_study$contexts %>% - select(-dataset_id) %>% - group_by(context_property) %>% - group_split() -> context_properties - -if(length(context_properties) > 0 ) { - for (i in seq_along(context_properties)) { - var_in <- context_properties[[i]][["var_in"]][1] - - writeLines(paste0("**", context_properties[[i]]$context_property[1], "**:", ifelse(is.null(var_in), "Added ", - paste0("Mapped from the column titled **", var_in, "** in your dataset ")), "with the category *", context_properties[[i]]$category[1], "*.")) - - writeLines("\n\nThe following values exist for this context:\n") - - context_properties[[i]] %>% dplyr::select(dplyr::any_of(c("find", "value", "description"))) %>% - my_kable_styling() %>% - writeLines() - } - new_question("(section `contexts`) Do the context details look complete?") - } else { - writeLines("There is currently no contextual data associated with this dataset.") - } - -``` - -## Taxa sampled - -We have records on `r data_study$traits$taxon_name %>% unique() %>% length()` taxa from your study. We have attempted to align taxon names with [the Australian Plant Census (APC)](https://biodiversity.org.au/nsl/services/APC) -- a global working list of all known Australian vascular plant taxa. The full list of names for taxa recorded in your study are listed below. - -# Trait measurements - -## Overview - -The dataset includes `r data_study$traits %>% nrow()` individual data points from `r data_study$traits$taxon_name %>% unique() %>% length()` taxa, with data included for the following `r data_study$traits$trait_name %>% unique() %>% length()` traits: - -```{r summary_table, results='asis', echo=FALSE} -n_records <- data_study$traits %>% - group_by(trait_name) %>% - summarise( - taxa = length(unique(taxon_name)), - records = length(value)) %>% - mutate(records_per_taxa = round(records / taxa, digits=2) ) - -n_records %>% - my_kable_styling() -``` - - -```{r, results='asis', echo=FALSE} -new_question("(section `traits`) Does this study include other trait data we may have missed?") - -if(median(n_records$records_per_taxa) < 2) { - new_question("(section `traits`) It appears as though we mostly have means for each taxa. Can you provide individual-level measurements?") -} - -new_question("(section `traits`) Were any of your data sourced from other studies? If so, can you tell us which records and the source (so that we can avoid duplicates, where possible)?") - -``` - -```{r excluded, results='asis', echo=FALSE, warning = FALSE} - -writeLines(ifelse(nrow(subset(data_study$excluded_data, error != "Missing value")) == 0, - "All data passed quality control so nothing was excluded in this study. (Yay!)", - "We excluded some records in your dataset as they did not pass the quality control. A summary of the number of points and reasons for exclusion are as follows:")) - -if(nrow(subset(data_study$excluded_data,error!="Missing value")) > 0 ) { - data_study$excluded_data %>% - filter(error!="Missing value") %>% - group_by(trait_name, error) %>% - summarise(`points excluded` = n()) %>% - ungroup() %>% - my_kable_styling() %>% - writeLines() - - writeLines(ifelse(nrow(subset(data_study$excluded_data, error == "Unsupported trait value")) > 0, - "In particular, the following data points were excluded because the supplied trait value could not be aligned with the allowable values in AusTraits (error = `Unsupported trait value`). The allowable values for each trait are listed in the [definitions file](http://traitecoevo.github.io/austraits.build/articles/trait_definitions.html) and lower in this report.","")) - - - if(nrow(subset(data_study$excluded_data,error == "Unsupported trait value")) > 0 ) { - data_study$excluded_data %>% - filter(error=="Unsupported trait value") %>% - select(trait_name,value) %>% - rename(`value that is unaligned` = value) %>% - distinct() %>% - arrange(trait_name, `value that is unaligned`) %>% - my_kable_styling() %>% - writeLines() - } - - writeLines(ifelse(nrow(subset(data_study$excluded_data, error == "Observation excluded in metadata")) > 0 , - "The following observations were explicitly excluded (error = `Observation excluded in metadata`). This decision was most likely made by the data curators because the study included non-native, not naturalised taxa or taxa that aren't vascular plants.","")) -" " - - if(nrow(subset(data_study$excluded_data,error == "Observation excluded in metadata")) > 0) { - metadata$exclude_observations %>% - util_list_to_df2() %>% - my_kable_styling() %>% - writeLines() - } - - writeLines("Summaries of excluded numeric observations are included later in the report.") - - new_question("(section `traits`) Can you provide any additional information so that above exclusions (required substitutions or other errors) no longer apply?") -} -``` - -```{r, echo=FALSE, message=FALSE} -tmp <- data_study$traits %>% filter(!trait_name %in% c("flowering_time", "fruiting_time", "recruitment_time")) -traits <- tmp$trait_name %>% unique() %>% sort() -traits_numeric <- traits[util_extract_list_element(traits, definitions, "type") == "numeric"] -traits_categorical <- traits[util_extract_list_element(traits, definitions, "type") != "numeric"] -``` - -## Numerical traits - -```{r numerical, results='asis', echo=FALSE, message=FALSE, warning=FALSE, fig.height=10, fig.width=10, eval=TRUE} -question_x <- function(x, y = trait) { - sprintf("(section `traits`) Can you provide missing details `%s` for trait `%s`?\n", x, y) %>% - new_question() -} - -define_variable <- function(var, schema) { - - description <- switch(var, - value_type = schema$value_type$description, - replicates = schema$austraits$elements$traits$elements$replicates, - methods = schema$austraits$elements$methods$elements$methods - ) - - sprintf("The variable `%s` is *'%s'*.", v, description) %>% - writeLines() - - if(var == "value_type") { - writeLines("Possible values are:") - - schema$value_type$values %>% - util_list_to_df1() %>% - my_kable_styling() %>% - writeLines() - } -} - - - -writeLines(ifelse(length(traits_numeric) == 0, - "No numerical traits data are currently available in this dataset", - "We have recorded the following traits with continuous numerical values in this dataset. Plots are shown comparing the distribution of values in your study (blue) to those in other datasets within AusTraits (red, green). For each trait, please eyeball the information provided and let us know of any issues or updates where appropriate.")) - -for(trait in traits_numeric) { - - - data_trait_study <- data_study$traits %>% - filter(trait_name == trait) %>% - mutate(value = as.numeric(value)) - - data_trait_all <- austraits$traits %>% - filter(trait_name == trait) %>% - mutate(value = as.numeric(value)) - - transforms <- metadata$traits %>% util_list_to_df2() %>% filter(trait_name == trait) - - elements <- definitions[[trait]] - - units <- elements$units - - c("", "", - sprintf("### %s", trait), - "", - sprintf("We aligned the contributed variable called `%s` in the data you supplied with the trait `%s` in the AusTraits database, with value_types `%s`.\n\nThe original variable was supplied with units `%s`; these were converted to our standard `%s`.\n\nThe data has the following properties:", transforms$var_in %>% paste0(collapse=", "), trait, transforms$value_type %>% paste0(collapse=", "), transforms$unit_in[1], elements$units), - "", - sprintf("- **standardised name**: %s", trait ), - sprintf("- **standardised description**: %s", elements$description ), - sprintf("- **label**: %s", elements$label ), - sprintf("- **units**: %s", units ), - sprintf("- **records**: %s in this study (of %s in AusTraits)", data_trait_study %>% nrow(), data_trait_all %>% nrow() ), - sprintf("- **allowable range**: %s - %s %s", elements$allowed_values_min, - elements$allowed_values_max, units), - sprintf("- **observed range in this study**: %s - %s %s", data_trait_study %>% dplyr::pull(value) %>% min(), - data_trait_study %>% dplyr::pull(value) %>% max(), units), - "", "", - "Data for this trait in this study were collected using the following methods:", "", - sprintf("- **value_type**: %s", transforms$value_type %>% paste0(collapse=", ")), - sprintf("- **replicates**: %s", transforms$replicates %>% paste0(collapse=", ")), - sprintf("- **methods**: %s", transforms$methods[1]), - "","") %>% - writeLines() - - writeLines(" ") - - writeLines(ifelse(nrow(filter(data_study$excluded_data, error == "Value out of allowable range" & trait_name == trait)) > 0, - "The following data points were excluded because the values were outside the allowable range (error = `Value out of allowable range`). Our ranges of allowable values are sufficiently generous, that most excluded data are well outside the expected range for the trait, but if you believe these data are valid, accurate measurements that will be useful to future AusTraits users, let us know.","")) - -if(nrow(filter(data_study$excluded_data, error == "Value out of allowable range" & trait_name == trait)) > 0 ) { - data_study$excluded_data %>% filter(error == "Value out of allowable range" & trait_name == trait) %>% - select(observation_id, taxon_name, location_id, value, unit) %>% - my_kable_styling() %>% - writeLines() -} - - austraits::plot_trait_distribution_beeswarm(austraits, trait, "dataset_id", highlight=dataset_id, hide_ids = TRUE) - - writeLines(c("")) - - outliers <- data_trait_all %>% - arrange(value) %>% - mutate(per = seq_len(n())/n()*100) %>% - filter(dataset_id %in% unique(data_trait_study$dataset_id), - per < 2.5 | per > 97.5) - - # check for outliers - if(nrow(outliers) > 0 ) { - writeLines(sprintf("OUTLIERS: %s of points in your dataset lie in either the bottom or top 2.5 percent of data\n", nrow(outliers))) - percent_outliers <- nrow(outliers)/nrow(data_trait_study)*100 - if(percent_outliers > 10) { - new_question(paste0("(section `traits`) More than 10% of your data points for the trait `", trait, "` are outliers, does this seem reasonable, given what you know about the biology of these taxa and overall distribution of values in AusTraits?")) - } else { - new_question( - sprintf("(section `traits`) Do the data for the trait `%s` appear correct?", trait) - ) - } - } - - # Check if methods data are complete - methods_missing <- transforms[1,] %in% c("unknown") - names_missing <- names(transforms)[methods_missing] - - for(v in c("value_type", "replicates", "methods")) { - - if(v %in% names_missing ) { - question_x(v, trait) - - if(!(v %in% structures_already_defined)) { - define_variable(v, schema) - structures_already_defined <- c(v, structures_already_defined) - } - } - } - - -} -``` - -## Categorical traits - -```{r categorical, results='asis', echo=FALSE, eval=TRUE} -writeLines( - ifelse(length(traits_categorical) == 0, - "No categorical traits data are currently available in this dataset.", - "We have recorded the following traits with categorical values in this dataset. Tables are shown comparing the distribution of values in your study to others in the dataset")) - -for(trait in traits_categorical) { - - transforms <- metadata$traits %>% util_list_to_df2() %>% filter(trait_name == trait) - if(is.na(metadata$substitutions[1])) { - substitutions_for_trait <- tibble - } else { - substitutions_for_trait <- metadata$substitutions %>% util_list_to_df2() %>% filter(trait_name == trait) - } - - data_trait_study <- data_study$traits %>% - filter(trait_name == trait) - - data_trait_all <- austraits$traits %>% - filter(trait_name == trait) - - definitions <- austraits$definitions - - elements <- definitions[[trait]] - - x <- c("", "", - sprintf("### %s", trait), - "", - sprintf("We aligned the variable called `%s`in the data you supplied with the trait `%s` in the AusTraits database.\n\nThe contributed data has the following properties:", transforms$var_in, trait), - "", - sprintf("- **standardised name**: %s", trait ), - sprintf("- **standardised description**: %s", elements$description ), - sprintf("- **label**: %s", elements$label ), - sprintf("- **records**: %s in this study (of %s in AusTraits)", data_trait_study %>% nrow(), data_trait_all %>% nrow() ), - "", - "Data for this trait in this study were collected using the following methods:", "", - sprintf("- **value_type**: %s", transforms$value_type), - sprintf("- **replicates**: %s", transforms$replicates), - sprintf("- **methods**: %s", transforms$methods), - "", - "The allowable values for this trait in AusTraits are:", - "", - elements$allowed_values_levels %>% unlist() %>% as.data.frame() %>% tibble::rownames_to_column() %>% rename(trait_value = "rowname", definition = ".") %>% arrange(trait_value) %>% my_kable_styling(), - "", - ifelse(nrow(substitutions_for_trait)>0, - "The following substitutions have been implemented to align categorical trait values in the submitted data table with allowable trait values specified in the AusTraits traits_definitions file.", - "No substitutions were required to align the data for this trait with AusTraits allowed trait values."), - ifelse(nrow(substitutions_for_trait)>0, - substitutions_for_trait %>% rename("value in contributed data table"=find,"AusTraits aligned value"=replace)%>% my_kable_styling(),""), - "The following table shows distribution of recorded values in your study and AusTraits as a whole. Note that space-delimited strings of values, indicate when multiple trait values were recorded for a single `observation` (taxon or individual-level) within a single study.", - "") %>% - writeLines() - - right_join( - data_trait_study %>% group_by(value) %>% summarise(study = n()), - data_trait_all %>% group_by(value) %>% summarise(total = n()), - by= "value" - ) %>% - mutate( - study = ifelse(is.na(study), 0, study), - `% study` = round(study/sum(study)*100), - `% total` = round(total/sum(total)*100) - ) %>% - arrange(-`study`,-`total`) %>% - my_kable_styling() %>% - add_header_above(c(" ", "Counts" = 2, "Percent" = 2)) %>% - column_spec(1, border_right = TRUE) %>% - column_spec(3, border_right = TRUE) %>% - column_spec(5, border_right = TRUE) %>% - writeLines() - - writeLines("\n") - new_question( - sprintf("(section `traits`) Do the data for the trait `%s` appear correct?", trait) - ) - - # Check if methods data are complete - methods_missing <- transforms[1,] %in% c("unknown") - names_missing <- names(transforms)[methods_missing] - - for(v in c("value_type", "replicates", "methods")) { - - if(v %in% names_missing ) { - question_x(v, trait) - - if(!(v %in% structures_already_defined)) { - define_variable(v, schema) - structures_already_defined <- c(v, structures_already_defined) - } - } - } - -} -``` - -# Taxon list - -We have records on the following taxa from your study. We have attempted to align taxon names in this study with the Australian Plant Census (APC) or Australian Plant Names Index (APNI). - -## Full taxa list - -The table below shows the table of all taxa in this study, where possible using the name aligned with the APC. Columns are as follows: - -```{r, results='asis', echo=FALSE} -schema$austraits$elements$taxa$elements %>% - util_list_to_df1() %>% - my_kable_styling() -``` - -Clicking on the name will take you to the relevant taxonomic record in the APC or APNI, where known: - -```{r taxon_list2, results='asis', echo=FALSE} -data_study$taxa %>% - select(one_of(names(schema$austraits$elements$taxa$elements))) -> tmp - -if(nrow(tmp) < 10000 ){ - tmp %>% - mutate( - taxon_name = as_link(taxon_id, taxon_name)) %>% - mutate_all(replace_na, "") %>% - my_kable_styling() -} else{ - cat(crayon:red("\t\tTable too large to dsiplay\n")) - tmp %>% - mutate_all(replace_na, "") %>% - write_csv(paste0(dataset_id,"_taxa.csv")) -} -``` - - -To create the list of aligned taxa, we needed to make some taxonomic changes. This involved two stages. - -* **Stage 1**: Where possible (i.e. there was no or only a few characters difference), the name you supplied was matched automatically with a known name in APC or APNI. In other cases we may have aligned the taxa by searching for an appropriate match. Such changes are documented in the study metadata file. The variable `aligned_name` shows the updated name. The variable `d1` shows the number of characters difference between the `original_name` and `aligned_name`. -* **Stage 2**: Once aligned with a known name, we used the APC to update the `aligned_name` to an accepted name. The taxonomic status of the cleaned name is indicated in column `status cleaned name`. If accepted, no change was made. If it is a synonym or otherwise, the name was changed according to the recommendation given in the APC. Where they existed, we preferred to take the accepted status of an `aligned_name`, if it existed. Alternative status values are indicated in brackets. This indicate if alternative uses of the name were ever applied. - -Links on `aligned_name` and `taxon_name` take you to the APC or APNI record for that name. - -```{r} -data_study$taxonomic_updates %>% select(-dataset_id) %>% - filter(original_name !=taxon_name) %>% - mutate( - d1 = purrr::map2_dbl(original_name, aligned_name, ~adist(.x, .y)) %>% as.character() %>% str_replace("0", ""), - d2 = purrr::map2_dbl(taxon_name, aligned_name, ~adist(.x, .y)) %>% as.character() %>% str_replace("0", ""), - aligned_name = as_link(aligned_scientific_name_id, aligned_name), - taxon_name = as_link(taxon_id, taxon_name), - aligned_name_alternative_taxonomic_status = replace_na(aligned_name_alternative_taxonomic_status, ""), - `status cleaned name` = sprintf("%s %s", aligned_name_taxonomic_status, ifelse(aligned_name_alternative_taxonomic_status=="", "", paste("(", aligned_name_alternative_taxonomic_status, ")"))) - ) %>% - select(original_name, d1, aligned_name, d2, taxon_name, `status cleaned name`) %>% - mutate_all(replace_na, "") %>% - my_kable_styling() -``` - -## Unknown taxa - -We were unable to find taxonomic matches for the following taxa in the data. Where genus and family are shown, the specified genus aligns with an accepted genus. The term `unplaced` appears as the `status` for names on the APNI list, but not yet reviewed by the APC. - -```{r taxa_list3, results='asis', echo=FALSE} -data_study$taxa %>% - filter(is.na(taxon_id)) %>% - select(one_of(names(schema$austraits$elements$taxa$elements))) %>% - select(-taxon_id, -scientific_name_id, -taxon_distribution, -establishment_means) %>% - mutate_all(replace_na, "") %>% - my_kable_styling() -``` - - -```{r, results='asis', echo=FALSE} -new_question("(section `taxonomic_updates`) Do these taxonomic alignments and corrections look reasonable?") -``` - -# Review of questions - -Here is a summary of the questions we would like answers to. Clicking on the links will take you back to the appropriate spot in the document. Please copy and past these into an email when you respond. - - - - -```{r, results='asis', echo=FALSE} -questions() %>% traits.build:::notetaker_print_all() %>% writeLines() -``` - - - - - -# Session information - -The technical information below may useful for us for the sake of reproducibility, but you don't need to worry about it! - - -This report was generated using the data from the commit `r util_get_SHA_link()` - -And the following R environment - -```{r, echo=FALSE} -sessionInfo() -``` - - - diff --git a/tests/build/helper.R b/tests/build/helper.R index 16e5551c5..059d57883 100644 --- a/tests/build/helper.R +++ b/tests/build/helper.R @@ -1,10 +1,7 @@ requireNamespace("testthat", quietly = TRUE) - - - library(traits.build) -source("../testthat/functions.R") +source("../functions.R") root.dir <- rprojroot::find_package_root_file() source(file.path(root.dir, "R/custom_R_code.R")) diff --git a/tests/build/test-axrxiv_constancy.R b/tests/build/test-axrxiv_constancy.R index 9ce620e61..cdf01f01a 100644 --- a/tests/build/test-axrxiv_constancy.R +++ b/tests/build/test-axrxiv_constancy.R @@ -10,9 +10,9 @@ build_comparison_set <- function(root.dir, definitions, unit_conversions, schema # Tomlinson_2019 - complete taxonomic changes f_build <- function(x, definitions, unit_conversions, schema, resource_metadata, taxon_list) { - config <- dataset_configure(file.path(root.dir, "data", x, "metadata.yml"), definitions, unit_conversions) - data <- dataset_process(file.path(root.dir, "data", x, "data.csv"), config, schema, resource_metadata) - data <- build_update_taxonomy(data, taxon_list) + config <- dataset_configure(file.path(root.dir, "data", x, "metadata.yml"), definitions) + data <- dataset_process(file.path(root.dir, "data", x, "data.csv"), config, schema, resource_metadata, unit_conversions) + data <- dataset_update_taxonomy(data, taxon_list) data } diff --git a/tests/functions.R b/tests/functions.R new file mode 100644 index 000000000..59b78bf53 --- /dev/null +++ b/tests/functions.R @@ -0,0 +1,126 @@ + +# Better than expect_silent as contains `info` and allows for complete failures +expect_no_error <- function (object, regexp = NULL, ..., info = NULL, label = NULL) +{ + error <- tryCatch({ + object + NULL + }, error = function(e) { + e + }) + expect(is.null(error), sprintf("%s threw an error: %s", label, paste(error$message, collapse=",")), info = info) + invisible(NULL) +} + + +expect_unique <- function (object, info = NULL, label = NULL) { + x <- table(unlist(object)) + i <- x==1 + comp <- compare(all(i), TRUE) + expect(comp$equal, + sprintf("%s - not unique: %s", info, paste(names(x)[!i], collapse= ", "))) + invisible(object) +} + +expect_isin <- function(object, expected, ..., info = NULL, label = NULL, + expected.label = NULL, na.rm=TRUE) { + + if(na.rm) + object <- object[!is.na(object)] + i <- object %in% expected + + comp <- compare(all(i), TRUE, ...) + expect( + comp$equal, + sprintf("%s - should not contain: %s", info, paste(object[!i], collapse= ", ")) + ) + + invisible(object) +} + +expect_not_NA <- function (object, info = NULL, label = NULL) { + i <- !is.na(object) + comp <- compare(all(i), TRUE) + expect(comp$equal, + sprintf("%s - contains NAs: %s", info, label)) + invisible(object) +} + +test_list <- function(data, info) { + expect_true(class(data)=="list", info = info) +} + +test_list_names_valid <- function(data, info) { + test_list(data, info) + expect_not_NA(names(data), info = info) +# expect_allowed_text(names(data), info = info) + expect_unique(names(data), info = info) +} + +test_list_named <- function(data, expected_names, info) { + test_list_names_valid(data, info) + expect_named(data, expected_names, info= info) +} + +test_list_named_contains <- function(data, expected_names, info) { + test_list_names_valid(data, info) + expect_isin(names(data), expected_names) +} + + +test_dataframe_valid <- function(data, info) { + expect_not_NA(colnames(data), info = info) +# expect_allowed_text(colnames(data), info = info) + expect_unique(colnames(data), info = info) + expect_true(is.data.frame(data), info = info) +} + +test_dataframe_named <- function(data, expected_colnames, info) { + test_dataframe_valid(data, info) + expect_named(data, expected_colnames, info= info) +} + + +test_build_dataset <- function(path_metadata, path_data, info, definitions, unit_conversions, schema, resource_metadata, taxon_list) { + + # test it builds with no errors + expect_no_error({ + build_config <- dataset_configure(path_metadata, definitions, unit_conversions) + }, info = paste(info, " config")) + + expect_no_error({ + build_dataset_raw <- dataset_process(path_data, build_config, schema, resource_metadata) + }, info = paste(info, " dataset_process")) + + expect_no_error({ + build_dataset <- build_update_taxonomy(build_dataset_raw, taxon_list) + }, info = paste(info, " update taxonomy")) + + test_structure(build_dataset, info, schema, definitions, single_dataset = TRUE) + + build_dataset +} + +test_structure <- function(data, info, schema, definitions, single_dataset = TRUE) { + + vars_austraits <- + schema$austraits$elements %>% names() + + vars_tables <- vars_austraits %>% subset(., !(. %in% c("dataset_id", "definitions", "schema", "sources", "metadata", "build_info"))) + + # test lists have the right objects + comparison <- vars_austraits + + test_list_named(data, comparison, info = c(info, " - main elements")) + + # test structure of tables + for(v in vars_tables) { + + comparison <- schema$austraits$elements[[v]]$elements %>% names() + + test_dataframe_named(data[[v]], comparison, info = paste(info, " - structure of ", v)) + } + + # contains allowed traits + expect_isin(data$traits$trait_name %>% unique(), definitions$elements %>% names(), info = paste("traits ", v)) +} diff --git a/tests/testthat.R b/tests/testthat.R deleted file mode 100644 index 4e5b2a012..000000000 --- a/tests/testthat.R +++ /dev/null @@ -1,4 +0,0 @@ -library(testthat) -library(traits.build) - -testthat::test_check("traits.build") diff --git a/tests/testthat/.gitignore b/tests/testthat/.gitignore deleted file mode 100644 index a90197507..000000000 --- a/tests/testthat/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -remake.yml -.git