From 42bc97f9b3b9efcdf74d2ac8cab9e9e46e0c5e51 Mon Sep 17 00:00:00 2001
From: "Dr. Erin M. Buchanan" <buchananlab@gmail.com>
Date: Sat, 1 Aug 2020 12:38:11 -0400
Subject: [PATCH] building package

---
 .Rbuildignore        |   2 +
 DESCRIPTION          |  11 ++++
 NAMESPACE            |   1 +
 R/get_data.R         | 117 +++++++++++++++++++++++++++++++++++++++++++
 R/load_metadata.R    |  28 +++++++++++
 man/hello.Rd         |  12 +++++
 semanticprimeR.Rproj |  20 ++++++++
 7 files changed, 191 insertions(+)
 create mode 100644 .Rbuildignore
 create mode 100644 DESCRIPTION
 create mode 100644 NAMESPACE
 create mode 100644 R/get_data.R
 create mode 100644 R/load_metadata.R
 create mode 100644 man/hello.Rd
 create mode 100644 semanticprimeR.Rproj

diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..91114bf
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..f0b9859
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,11 @@
+Package: semanticprimeR
+Type: Package
+Title: What the Package Does (Title Case)
+Version: 0.1.0
+Author: Who wrote it
+Maintainer: The package maintainer <yourself@somewhere.net>
+Description: More about what it does (maybe more than one line)
+    Use four spaces when indenting paragraphs within the Description.
+License: What license is it under?
+Encoding: UTF-8
+LazyData: true
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..d75f824
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/R/get_data.R b/R/get_data.R
new file mode 100644
index 0000000..cb606cd
--- /dev/null
+++ b/R/get_data.R
@@ -0,0 +1,117 @@
+#' Get Dataset
+#'
+#' This function allows you to import the current datasets avaliable from
+#' \href{https://github.com/orgs/SemanticPriming/}{Semantic Priming GitHub Group}.
+#'
+#' @param corpus Include a two letter code to download the Open Subtitles corpus for
+#' text models. You can view the corpora on
+#' \href{http://opus.nlpl.eu/OpenSubtitles.php}{their website}. Note: these files
+#' can be very large, so they may take up a lot of memory to download. They are
+#' text based files that are read using `readLines`.
+#' @param bibtexID The bibtex ID of the dataset you are trying to load.
+#' You can leave all parameters blank to load just the metadata.
+#' @param citation Include the citation for the dataset you loaded - will only
+#' load if you include a bibtex ID.
+#' @param language If you include a bibtex ID, you will get back the language of
+#' the dataset, if you do not include a bibtex ID, it will return a list of
+#' datasets in that language.
+#' @param variables If you include a bibtex ID, you will get back the variables
+#' included the dataset, if you do not include a bibtex ID, it will return a list of
+#' datasets that include that variable (can also be paired with language).
+#' Use the column names from the metadata as your filter.
+#' @return
+#' \item{metadata}{The metadata list of avaliable datasets}
+#' \item{loaded_data}{The dataset you requested to load}
+#' \item{language}{The language of the dataset you requested to load}
+#' \item{variables}{The variables of the dataset you requested to load}
+#' \item{datasets}{Possible datasets based on your language and variable names}
+#'
+#' @keywords metadata, datasets, linguistic norms
+#' @export
+#' @examples
+#'
+#' get_dataset()
+#' get_dataset(bibtexID = "Birchenough2017", citation = TRUE)
+#' get_dataset(language = "English", variables = c("aoa", "freq"))
+
+
+get_dataset <- function(corpus = NULL,
+                        bibtexID = NULL,
+                        citation = NULL,
+                        language = NULL,
+                        variables = NULL
+                        ) {
+
+  metadata <- load_metadata()
+  variable_return <- list(metadata = metadata)
+
+  if (!is.null(corpus)){
+
+      con <- gzcon(url(paste("http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.",
+                                      corpus, ".gz", sep="")))
+      variable_return$subtitle <- readLines(con, encoding = "utf8")
+  }
+
+  if (!is.null(bibtexID)) {
+    data_link <- metadata$link[metadata$bibtex == bibtexID]
+    variable_return$loaded_data <- read.csv(url(data_link), stringsAsFactors = F)
+
+    if (!is.null(citation)){
+
+      variable_return$citation <- paste0(metadata$author[metadata$bibtex == bibtexID], ". (",
+                        metadata$year[metadata$bibtex == bibtexID], "). ",
+                        metadata$ref_title[metadata$bibtex == bibtexID], ". ",
+                        metadata$ref_journal[metadata$bibtex == bibtexID], ", ",
+                        metadata$ref_volume[metadata$bibtex == bibtexID], ", ",
+                        metadata$ref_page[metadata$bibtex == bibtexID], ". doi: ",
+                        metadata$ref_doi[metadata$bibtex == bibtexID]
+      )
+    }
+
+    if (!is.null(language)){
+      variable_return$language <- metadata$language[metadata$bibtex == bibtexID]
+    }
+
+    if (!is.null(variables)){
+      temp <- metadata[metadata$bibtex == bibtexID, 26:ncol(metadata)]
+      variable_return$variables <- colnames(temp)[temp == 1]
+    }
+
+  } else {
+
+    if (!is.null(language) & !is.null(variables)) { #both
+
+      temp <- metadata[ tolower(metadata$language) == tolower(language) , ]
+
+      for (var in variables){
+        if (var %in% colnames(metadata)){
+          temp <- temp[ temp[ , var] == 1 , ]
+          }
+        }
+
+      variable_return$datasets <- temp
+
+    } else if (!is.null(language)){ #just language
+
+      variable_return$datasets <- metadata[ tolower(metadata$language) == tolower(language) , ]
+
+      } else if (!is.null(variables)){ #just variables
+
+      temp <- metadata
+
+      for (var in variables){
+        if (var %in% colnames(metadata)){
+          temp <- temp[ temp[ , var] == 1 , ]
+        }
+      }
+
+      variable_return$datasets <- temp
+
+    }
+
+  }
+
+  return(variable_return)
+}
+
+#' @rdname get_data
diff --git a/R/load_metadata.R b/R/load_metadata.R
new file mode 100644
index 0000000..b4ba566
--- /dev/null
+++ b/R/load_metadata.R
@@ -0,0 +1,28 @@
+#' Load Metadata
+#'
+#' This function loads the current metadata avaliable from the
+#' \href{https://github.com/orgs/SemanticPriming/}{Semantic Priming GitHub Group}.
+#'
+#' @param webaddress The default value for webaddress is the current location
+#' of the metadata list.
+#' @return
+#' \item{metadata}{The metadata list of avaliable datasets}
+#'
+#' @keywords metadata, datasets, linguistic norms
+#' @export
+#' @examples
+#'
+#' #Use the following to load the metadata:
+#' metadata <- load_metadata()
+#' View(metadata)
+
+load_metadata <- function(webaddress = "https://raw.githubusercontent.com/SemanticPriming/LAB-data/master/included_data.csv") {
+
+  metadata <- read.csv(url(webaddress), stringsAsFactors = F)
+
+  metadata <- subset(metadata, included == "yes")
+
+  return(metadata)
+}
+
+#' @rdname load_metadata
diff --git a/man/hello.Rd b/man/hello.Rd
new file mode 100644
index 0000000..0fa7c4b
--- /dev/null
+++ b/man/hello.Rd
@@ -0,0 +1,12 @@
+\name{hello}
+\alias{hello}
+\title{Hello, World!}
+\usage{
+hello()
+}
+\description{
+Prints 'Hello, world!'.
+}
+\examples{
+hello()
+}
diff --git a/semanticprimeR.Rproj b/semanticprimeR.Rproj
new file mode 100644
index 0000000..497f8bf
--- /dev/null
+++ b/semanticprimeR.Rproj
@@ -0,0 +1,20 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source