Skip to content

Commit

Permalink
building package
Browse files Browse the repository at this point in the history
  • Loading branch information
doomlab committed Aug 1, 2020
1 parent 0bf2672 commit 42bc97f
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
11 changes: 11 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Package: semanticprimeR
Type: Package
Title: What the Package Does (Title Case)
Version: 0.1.0
Author: Who wrote it
Maintainer: The package maintainer <[email protected]>
Description: More about what it does (maybe more than one line)
Use four spaces when indenting paragraphs within the Description.
License: What license is it under?
Encoding: UTF-8
LazyData: true
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
exportPattern("^[[:alpha:]]+")
117 changes: 117 additions & 0 deletions R/get_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#' Get Dataset
#'
#' This function allows you to import the current datasets avaliable from
#' \href{https://github.com/orgs/SemanticPriming/}{Semantic Priming GitHub Group}.
#'
#' @param corpus Include a two letter code to download the Open Subtitles corpus for
#' text models. You can view the corpora on
#' \href{http://opus.nlpl.eu/OpenSubtitles.php}{their website}. Note: these files
#' can be very large, so they may take up a lot of memory to download. They are
#' text based files that are read using `readLines`.
#' @param bibtexID The bibtex ID of the dataset you are trying to load.
#' You can leave all parameters blank to load just the metadata.
#' @param citation Include the citation for the dataset you loaded - will only
#' load if you include a bibtex ID.
#' @param language If you include a bibtex ID, you will get back the language of
#' the dataset, if you do not include a bibtex ID, it will return a list of
#' datasets in that language.
#' @param variables If you include a bibtex ID, you will get back the variables
#' included the dataset, if you do not include a bibtex ID, it will return a list of
#' datasets that include that variable (can also be paired with language).
#' Use the column names from the metadata as your filter.
#' @return
#' \item{metadata}{The metadata list of avaliable datasets}
#' \item{loaded_data}{The dataset you requested to load}
#' \item{language}{The language of the dataset you requested to load}
#' \item{variables}{The variables of the dataset you requested to load}
#' \item{datasets}{Possible datasets based on your language and variable names}
#'
#' @keywords metadata, datasets, linguistic norms
#' @export
#' @examples
#'
#' get_dataset()
#' get_dataset(bibtexID = "Birchenough2017", citation = TRUE)
#' get_dataset(language = "English", variables = c("aoa", "freq"))


get_dataset <- function(corpus = NULL,
bibtexID = NULL,
citation = NULL,
language = NULL,
variables = NULL
) {

metadata <- load_metadata()
variable_return <- list(metadata = metadata)

if (!is.null(corpus)){

con <- gzcon(url(paste("http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.",
corpus, ".gz", sep="")))
variable_return$subtitle <- readLines(con, encoding = "utf8")
}

if (!is.null(bibtexID)) {
data_link <- metadata$link[metadata$bibtex == bibtexID]
variable_return$loaded_data <- read.csv(url(data_link), stringsAsFactors = F)

if (!is.null(citation)){

variable_return$citation <- paste0(metadata$author[metadata$bibtex == bibtexID], ". (",
metadata$year[metadata$bibtex == bibtexID], "). ",
metadata$ref_title[metadata$bibtex == bibtexID], ". ",
metadata$ref_journal[metadata$bibtex == bibtexID], ", ",
metadata$ref_volume[metadata$bibtex == bibtexID], ", ",
metadata$ref_page[metadata$bibtex == bibtexID], ". doi: ",
metadata$ref_doi[metadata$bibtex == bibtexID]
)
}

if (!is.null(language)){
variable_return$language <- metadata$language[metadata$bibtex == bibtexID]
}

if (!is.null(variables)){
temp <- metadata[metadata$bibtex == bibtexID, 26:ncol(metadata)]
variable_return$variables <- colnames(temp)[temp == 1]
}

} else {

if (!is.null(language) & !is.null(variables)) { #both

temp <- metadata[ tolower(metadata$language) == tolower(language) , ]

for (var in variables){
if (var %in% colnames(metadata)){
temp <- temp[ temp[ , var] == 1 , ]
}
}

variable_return$datasets <- temp

} else if (!is.null(language)){ #just language

variable_return$datasets <- metadata[ tolower(metadata$language) == tolower(language) , ]

} else if (!is.null(variables)){ #just variables

temp <- metadata

for (var in variables){
if (var %in% colnames(metadata)){
temp <- temp[ temp[ , var] == 1 , ]
}
}

variable_return$datasets <- temp

}

}

return(variable_return)
}

#' @rdname get_data
28 changes: 28 additions & 0 deletions R/load_metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#' Load Metadata
#'
#' This function loads the current metadata avaliable from the
#' \href{https://github.com/orgs/SemanticPriming/}{Semantic Priming GitHub Group}.
#'
#' @param webaddress The default value for webaddress is the current location
#' of the metadata list.
#' @return
#' \item{metadata}{The metadata list of avaliable datasets}
#'
#' @keywords metadata, datasets, linguistic norms
#' @export
#' @examples
#'
#' #Use the following to load the metadata:
#' metadata <- load_metadata()
#' View(metadata)

load_metadata <- function(webaddress = "https://raw.githubusercontent.com/SemanticPriming/LAB-data/master/included_data.csv") {

metadata <- read.csv(url(webaddress), stringsAsFactors = F)

metadata <- subset(metadata, included == "yes")

return(metadata)
}

#' @rdname load_metadata
12 changes: 12 additions & 0 deletions man/hello.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
\name{hello}
\alias{hello}
\title{Hello, World!}
\usage{
hello()
}
\description{
Prints 'Hello, world!'.
}
\examples{
hello()
}
20 changes: 20 additions & 0 deletions semanticprimeR.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source

0 comments on commit 42bc97f

Please sign in to comment.