-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
191 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Package: semanticprimeR | ||
Type: Package | ||
Title: What the Package Does (Title Case) | ||
Version: 0.1.0 | ||
Author: Who wrote it | ||
Maintainer: The package maintainer <[email protected]> | ||
Description: More about what it does (maybe more than one line) | ||
Use four spaces when indenting paragraphs within the Description. | ||
License: What license is it under? | ||
Encoding: UTF-8 | ||
LazyData: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
exportPattern("^[[:alpha:]]+") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#' Get Dataset | ||
#' | ||
#' This function allows you to import the current datasets avaliable from | ||
#' \href{https://github.com/orgs/SemanticPriming/}{Semantic Priming GitHub Group}. | ||
#' | ||
#' @param corpus Include a two letter code to download the Open Subtitles corpus for | ||
#' text models. You can view the corpora on | ||
#' \href{http://opus.nlpl.eu/OpenSubtitles.php}{their website}. Note: these files | ||
#' can be very large, so they may take up a lot of memory to download. They are | ||
#' text based files that are read using `readLines`. | ||
#' @param bibtexID The bibtex ID of the dataset you are trying to load. | ||
#' You can leave all parameters blank to load just the metadata. | ||
#' @param citation Include the citation for the dataset you loaded - will only | ||
#' load if you include a bibtex ID. | ||
#' @param language If you include a bibtex ID, you will get back the language of | ||
#' the dataset, if you do not include a bibtex ID, it will return a list of | ||
#' datasets in that language. | ||
#' @param variables If you include a bibtex ID, you will get back the variables | ||
#' included the dataset, if you do not include a bibtex ID, it will return a list of | ||
#' datasets that include that variable (can also be paired with language). | ||
#' Use the column names from the metadata as your filter. | ||
#' @return | ||
#' \item{metadata}{The metadata list of avaliable datasets} | ||
#' \item{loaded_data}{The dataset you requested to load} | ||
#' \item{language}{The language of the dataset you requested to load} | ||
#' \item{variables}{The variables of the dataset you requested to load} | ||
#' \item{datasets}{Possible datasets based on your language and variable names} | ||
#' | ||
#' @keywords metadata, datasets, linguistic norms | ||
#' @export | ||
#' @examples | ||
#' | ||
#' get_dataset() | ||
#' get_dataset(bibtexID = "Birchenough2017", citation = TRUE) | ||
#' get_dataset(language = "English", variables = c("aoa", "freq")) | ||
|
||
|
||
get_dataset <- function(corpus = NULL, | ||
bibtexID = NULL, | ||
citation = NULL, | ||
language = NULL, | ||
variables = NULL | ||
) { | ||
|
||
metadata <- load_metadata() | ||
variable_return <- list(metadata = metadata) | ||
|
||
if (!is.null(corpus)){ | ||
|
||
con <- gzcon(url(paste("http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.", | ||
corpus, ".gz", sep=""))) | ||
variable_return$subtitle <- readLines(con, encoding = "utf8") | ||
} | ||
|
||
if (!is.null(bibtexID)) { | ||
data_link <- metadata$link[metadata$bibtex == bibtexID] | ||
variable_return$loaded_data <- read.csv(url(data_link), stringsAsFactors = F) | ||
|
||
if (!is.null(citation)){ | ||
|
||
variable_return$citation <- paste0(metadata$author[metadata$bibtex == bibtexID], ". (", | ||
metadata$year[metadata$bibtex == bibtexID], "). ", | ||
metadata$ref_title[metadata$bibtex == bibtexID], ". ", | ||
metadata$ref_journal[metadata$bibtex == bibtexID], ", ", | ||
metadata$ref_volume[metadata$bibtex == bibtexID], ", ", | ||
metadata$ref_page[metadata$bibtex == bibtexID], ". doi: ", | ||
metadata$ref_doi[metadata$bibtex == bibtexID] | ||
) | ||
} | ||
|
||
if (!is.null(language)){ | ||
variable_return$language <- metadata$language[metadata$bibtex == bibtexID] | ||
} | ||
|
||
if (!is.null(variables)){ | ||
temp <- metadata[metadata$bibtex == bibtexID, 26:ncol(metadata)] | ||
variable_return$variables <- colnames(temp)[temp == 1] | ||
} | ||
|
||
} else { | ||
|
||
if (!is.null(language) & !is.null(variables)) { #both | ||
|
||
temp <- metadata[ tolower(metadata$language) == tolower(language) , ] | ||
|
||
for (var in variables){ | ||
if (var %in% colnames(metadata)){ | ||
temp <- temp[ temp[ , var] == 1 , ] | ||
} | ||
} | ||
|
||
variable_return$datasets <- temp | ||
|
||
} else if (!is.null(language)){ #just language | ||
|
||
variable_return$datasets <- metadata[ tolower(metadata$language) == tolower(language) , ] | ||
|
||
} else if (!is.null(variables)){ #just variables | ||
|
||
temp <- metadata | ||
|
||
for (var in variables){ | ||
if (var %in% colnames(metadata)){ | ||
temp <- temp[ temp[ , var] == 1 , ] | ||
} | ||
} | ||
|
||
variable_return$datasets <- temp | ||
|
||
} | ||
|
||
} | ||
|
||
return(variable_return) | ||
} | ||
|
||
#' @rdname get_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#' Load Metadata | ||
#' | ||
#' This function loads the current metadata avaliable from the | ||
#' \href{https://github.com/orgs/SemanticPriming/}{Semantic Priming GitHub Group}. | ||
#' | ||
#' @param webaddress The default value for webaddress is the current location | ||
#' of the metadata list. | ||
#' @return | ||
#' \item{metadata}{The metadata list of avaliable datasets} | ||
#' | ||
#' @keywords metadata, datasets, linguistic norms | ||
#' @export | ||
#' @examples | ||
#' | ||
#' #Use the following to load the metadata: | ||
#' metadata <- load_metadata() | ||
#' View(metadata) | ||
|
||
load_metadata <- function(webaddress = "https://raw.githubusercontent.com/SemanticPriming/LAB-data/master/included_data.csv") { | ||
|
||
metadata <- read.csv(url(webaddress), stringsAsFactors = F) | ||
|
||
metadata <- subset(metadata, included == "yes") | ||
|
||
return(metadata) | ||
} | ||
|
||
#' @rdname load_metadata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
\name{hello} | ||
\alias{hello} | ||
\title{Hello, World!} | ||
\usage{ | ||
hello() | ||
} | ||
\description{ | ||
Prints 'Hello, world!'. | ||
} | ||
\examples{ | ||
hello() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX | ||
|
||
AutoAppendNewline: Yes | ||
StripTrailingWhitespace: Yes | ||
|
||
BuildType: Package | ||
PackageUseDevtools: Yes | ||
PackageInstallArgs: --no-multiarch --with-keep.source |