Merge pull request #10 from jrdnbradford/doc-updates

Update docs
jrdnbradford · Jul 2, 2024 · 43df12e · 43df12e
2 parents 2430d90 + 4d24ca0
commit 43df12e
Show file tree

Hide file tree

Showing 71 changed files with 477 additions and 727 deletions.
diff --git a/R/data.R b/R/data.R
diff --git a/README.md b/README.md
@@ -6,22 +6,46 @@
 
 -H. P. Lovecraft, *The Call of Cthulhu*
 
-## H. P. Lovecraft's Works
+## H. P. Lovecraft Datasets
 
-This package contains H. P. Lovecraft's corpus in both [raw txt form](/data-raw/corpus/) and [R's native RDA format](/data/) for text processing.
+This package contains H. P. Lovecraft's corpus as R datasets for textual analysis.
+
+## Usage
 
 Character vectors of individual works can be accessed in several ways:
 ```R
 cthulhu <- lovecraftr::the_call_of_cthulhu
+head(cthulhu)
+
+#> [1] "THE CALL OF CTHULHU"
+#> [2] ""
+#> [3] "Of such great powers or beings there may be conceivably a survival...a survival"
+#> [4] "of a hugely remote period when...consciousness was manifested, perhaps, in"
+#> [5] "shapes and forms long since withdrawn before the tide of advancing"
+#> [6] "humanity...forms of which poetry and legend alone have caught a flying memory"
 ```
 or
 ```R
 data("the_call_of_cthulhu")
 ```
 
-The entire corpus can be accessed as a `data.frame`/`tbl_df` with:
+The entire corpus can be accessed as a tibble with:
 ```R
-corpus <- lovecraftr::lovecraft()
+corpus <- lovecraftr::lovecraft_corpus()
+
+corpus |>
+  dplyr::filter(title == "The Call Of Cthulhu") |>
+  head()
+
+#> # A tibble: 6 × 2
+#>   title               text
+#>   <chr>               <chr>
+#> 1 The Call Of Cthulhu "THE CALL OF CTHULHU"
+#> 2 The Call Of Cthulhu ""
+#> 3 The Call Of Cthulhu "Of such great powers or beings there may be conceivably …
+#> 4 The Call Of Cthulhu "of a hugely remote period when...consciousness was manif…
+5 The Call Of Cthulhu "shapes and forms long since withdrawn before the tide of…
+6 The Call Of Cthulhu "humanity...forms of which poetry and legend alone have c…
 ```
 or
 ```R

diff --git a/data-raw/generate_data.R b/data-raw/generate_data.R
@@ -1,79 +1,74 @@
-#!/usr/bin/env Rscript 
+#!/usr/bin/env Rscript
 
 # This script generates the RDA datasets and their
 # accompanying documentation for {lovecraftr} using
 # the raw txt files in data-raw/corpus
 
 
 raw_txt_path <- file.path("data-raw", "corpus")
+corpus <- list.files(raw_txt_path)
+
 data_docs_file <- file.path("R", "data.R")
+file.remove(data_docs_file)
 
 header <- "# This documentation is generated by a script, do not edit by hand
 # To update, edit the raw txt in data-raw/corpus and run data-raw/generate_data.R
 ##################################################################################
 "
 
-generate_data <- function() {
-  corpus <- list.files(
-    path = raw_txt_path,
-    full.names = FALSE,
-    recursive = FALSE
-  )
-
-  file.remove(data_docs_file)
-
-  docs_template <- "{header}
-#' @title \"{title}\"
+docs_template <- "{header}
+#' @title Dataset of \"{title}\"
 #'
-#' @description A dataset containing UTF-8 plain text of
-#'   H. P. Lovecraft's \"{title}\"
+#' @description A dataset containing text of H. P. Lovecraft's \"{title}\"
 #'
 #' @examples
 #' data(\"{dataset_name}\")
 #' lovecraftr::{txt}
 #'
-#' @format A character vector with {scales::number(length(text), big.mark = \",\")} elements,
-#'   with roughly 80 characters per line
+#' @format A character vector with {num_vectors} elements, with roughly 80 characters per line
 #'
 #' @source Public domain
 \"{dataset_name}\"
 
 ##################################################################################
 "
-  docs <- c()
-  dataframes_list <- list()
-  for (txt in corpus) {
-    txt_path <- file.path(raw_txt_path, txt)
-    text <- readr::read_lines(txt_path)
-    dataset_name <- as.name(txt)
-    assign(txt, text)
-    do.call(usethis::use_data, list(dataset_name, overwrite = TRUE))
+docs <- c()
+dataset_list <- list()
+for (txt in corpus) {
+  txt_path <- file.path(raw_txt_path, txt)
+  text <- readr::read_lines(txt_path)
+  num_vectors <- scales::number(length(text), big.mark = ",")
+  dataset_name <- as.name(txt)
+  assign(txt, text)
+  do.call(usethis::use_data, list(dataset_name, overwrite = TRUE))
 
-    title <- text |>
-      head(1) |>
-      stringr::str_to_title()
+  title <- text |>
+    head(1) |>
+    stringr::str_to_title()
 
-    dataset_docs <- glue::glue(docs_template)
-    docs <- c(docs, dataset_docs)
+  dataset_docs <- glue::glue(docs_template)
+  docs <- c(docs, dataset_docs)
 
-    dataframes_list[[dataset_name]] <- tibble::tibble(
-      title = title,
-      text = text
-    )
-  }
+  dataset_list[[dataset_name]] <- tibble::tibble(
+    title = title,
+    text = text
+  )
+}
 
-  lovecraft <- dplyr::bind_rows(dataframes_list)
-  corpus_docs <- glue::glue("{header}
-#' @title Lovecraft
+lovecraft <- dplyr::bind_rows(dataset_list)
+num_rows <- scales::number(nrow(lovecraft), big.mark = ",")
+num_cols <- ncol(lovecraft)
+corpus_docs <- glue::glue("{header}
+#' @title Dataset of Lovecraft's corpus
 #'
-#' @description A dataset containing UTF-8 plain text of
-#'   all the H. P. Lovecraft works available in {{lovecraftr}}
+#' @description A dataset containing text of all the H. P. Lovecraft works available in {{lovecraftr}}
 #'
 #' @examples
 #' data(\"lovecraft\")
 #' lovecraftr::lovecraft
+#' df <- lovecraftr::lovecraft_corpus()
 #'
-#' @format A tibble with {scales::number(nrow(lovecraft), big.mark = \",\")} rows and {ncol(lovecraft)} columns:
+#' @format A tibble with {num_rows} rows and {num_cols} columns:
 #' \\describe{{
 #'   \\item{{title}}{{Title of the work}}
 #'   \\item{{text}}{{~80 characters of text from the title}}
@@ -83,12 +78,11 @@ generate_data <- function() {
 \"lovecraft\"
 ")
 
-  docs <- c(docs, corpus_docs)
-  usethis::use_data(lovecraft, overwrite = TRUE)
+docs <- c(docs, corpus_docs)
+usethis::use_data(lovecraft, overwrite = TRUE)
 
-  doc_file <- file(data_docs_file, "a")
-  write(docs, file = doc_file)
-  close(doc_file)
-}
+doc_file <- file(data_docs_file, "a")
+write(docs, file = doc_file)
+close(doc_file)
 
-generate_data()
+roxygen2::roxygenize()
diff --git a/man/at_the_mountains_of_madness.Rd b/man/at_the_mountains_of_madness.Rd
diff --git a/man/azathoth.Rd b/man/azathoth.Rd
diff --git a/man/beyond_the_wall_of_sleep.Rd b/man/beyond_the_wall_of_sleep.Rd
diff --git a/man/celephais.Rd b/man/celephais.Rd
diff --git a/man/cool_air.Rd b/man/cool_air.Rd
diff --git a/man/dagon.Rd b/man/dagon.Rd
diff --git a/man/dreams_in_the_witch_house.Rd b/man/dreams_in_the_witch_house.Rd
diff --git a/man/ex_oblivione.Rd b/man/ex_oblivione.Rd
diff --git a/man/facts_concerning_the_late_arthur_jermyn_and_his_family.Rd b/man/facts_concerning_the_late_arthur_jermyn_and_his_family.Rd