From 90e45b178e250cb3a5a3036cb68132601288f0ee Mon Sep 17 00:00:00 2001 From: chainsawriot Date: Sun, 26 Nov 2023 14:53:21 +0100 Subject: [PATCH] Create own class #2 (#5) --- NAMESPACE | 3 +++ R/tokenvars.R | 25 ++++++++++++++++++++--- README.Rmd | 4 +++- README.md | 55 ++++++++++++++++++++++++++++----------------------- 4 files changed, 58 insertions(+), 29 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6e18f9e..483d6ec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,8 @@ # Generated by roxygen2: do not edit by hand +S3method(as.tokens,tokens_with_tokenvars) +S3method(print,tokens_with_tokenvars) export("tokenvars<-") export(tokens_add_tokenvars) export(tokenvars) +importFrom(quanteda,as.tokens) diff --git a/R/tokenvars.R b/R/tokenvars.R index 1b1347e..97fe9d8 100644 --- a/R/tokenvars.R +++ b/R/tokenvars.R @@ -44,10 +44,30 @@ tokens_add_tokenvars <- function(x) { unclassed_x <- unclass(x) unclassed_x <- add_tokenid(unclassed_x) attr(unclassed_x, "tokenvars") <- make_tokenvars(unclassed_x) - class(unclassed_x) <- c("tokens") + class(unclassed_x) <- c("tokens_with_tokenvars") return(unclassed_x) } +#' @importFrom quanteda as.tokens +#' @method as.tokens tokens_with_tokenvars +#' @export +as.tokens.tokens_with_tokenvars <- function(x, remove_tokenvars = TRUE, ...) { + if (remove_tokenvars) { + attr(x, "tokenvars") <- NULL + } + class(x) <- "tokens" + return(x) +} + +#' @export +print.tokens_with_tokenvars <- function(x, max_ndoc = quanteda::quanteda_options("print_tokens_max_ndoc"), + max_ntoken = quanteda::quanteda_options("print_tokens_max_ntoken"), + show_summary = quanteda::quanteda_options("print_tokens_summary"), ...) { + ## TODO + print(as.tokens(x, remove_tokenvars = FALSE), max_ndoc = max_ndoc, max_ntoken = max_ntoken, show_summary = show_summary) + cat("With Token Variables.\n") +} + make_tokenvars <- function(unclassed_x) { output <- list() for (i in seq_along(unclassed_x)) { @@ -74,6 +94,5 @@ pp <- function(x, max_ndoc = quanteda::quanteda_options("print_tokens_max_ndoc") if (is.null(attr(x, "tokenvars"))) { print(x, max_ndoc = max_ndoc, max_ntoken = max_ntoken, show_summary = show_summary, ...) return(invisible(NULL)) - } - + } } diff --git a/README.Rmd b/README.Rmd index 22c4263..f5dc470 100644 --- a/README.Rmd +++ b/README.Rmd @@ -50,6 +50,8 @@ tokenvars(tok) ## nothing to see here ```{r example3} tokenvars(tok, "tag") <- list(c("NNP", "VBZ", "JJ", "IN", "JJ", "JJ", "NN", "NN", "."), c("NNP", ".", "NNP", "VBD", "CD", "NNS", "IN", "NNP", "NNP", ".")) +tokenvars(tok, "lemma") <- list(c("spaCy", "be", "great", "at", "fast", "natural", "language", "processing", "."), + c("Mr", ".", "Smith", "spend", "two", "year", "in", "North", "Carolina", ".")) ``` ```{r example4} @@ -61,5 +63,5 @@ tokenvars(tok, field = "tag") ``` ```{r example6} -tokenvars(tok, field = "tag", docid = "d1") +tokenvars(tok, field = "lemma", docid = "d2") ``` diff --git a/README.md b/README.md index 233481a..ae46733 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,9 @@ tok #> #> d2 : #> [1] "Mr" "." "Smith" "spent" "two" "years" -#> [7] "in" "North" "Carolina" "." +#> [7] "in" "North" "Carolina" "." +#> +#> With Token Variables. ``` ``` r @@ -57,34 +59,36 @@ tokenvars(tok) ## nothing to see here ``` r tokenvars(tok, "tag") <- list(c("NNP", "VBZ", "JJ", "IN", "JJ", "JJ", "NN", "NN", "."), c("NNP", ".", "NNP", "VBD", "CD", "NNS", "IN", "NNP", "NNP", ".")) +tokenvars(tok, "lemma") <- list(c("spaCy", "be", "great", "at", "fast", "natural", "language", "processing", "."), + c("Mr", ".", "Smith", "spend", "two", "year", "in", "North", "Carolina", ".")) ``` ``` r tokenvars(tok) #> $d1 -#> tag -#> 1 NNP -#> 2 VBZ -#> 3 JJ -#> 4 IN -#> 5 JJ -#> 6 JJ -#> 7 NN -#> 8 NN -#> 9 . +#> tag lemma +#> 1 NNP spaCy +#> 2 VBZ be +#> 3 JJ great +#> 4 IN at +#> 5 JJ fast +#> 6 JJ natural +#> 7 NN language +#> 8 NN processing +#> 9 . . #> #> $d2 -#> tag -#> 1 NNP -#> 2 . -#> 3 NNP -#> 4 VBD -#> 5 CD -#> 6 NNS -#> 7 IN -#> 8 NNP -#> 9 NNP -#> 10 . +#> tag lemma +#> 1 NNP Mr +#> 2 . . +#> 3 NNP Smith +#> 4 VBD spend +#> 5 CD two +#> 6 NNS year +#> 7 IN in +#> 8 NNP North +#> 9 NNP Carolina +#> 10 . . ``` ``` r @@ -97,7 +101,8 @@ tokenvars(tok, field = "tag") ``` ``` r -tokenvars(tok, field = "tag", docid = "d1") -#> $d1 -#> [1] "NNP" "VBZ" "JJ" "IN" "JJ" "JJ" "NN" "NN" "." +tokenvars(tok, field = "lemma", docid = "d2") +#> $d2 +#> [1] "Mr" "." "Smith" "spend" "two" "year" +#> [7] "in" "North" "Carolina" "." ```