Create own class #2 (#5)

gesistsa · Nov 26, 2023 · 90e45b1 · 90e45b1
1 parent d28dcde
commit 90e45b1
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 29 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(as.tokens,tokens_with_tokenvars)
+S3method(print,tokens_with_tokenvars)
 export("tokenvars<-")
 export(tokens_add_tokenvars)
 export(tokenvars)
+importFrom(quanteda,as.tokens)
diff --git a/R/tokenvars.R b/R/tokenvars.R
@@ -44,10 +44,30 @@ tokens_add_tokenvars <- function(x) {
     unclassed_x <- unclass(x)
     unclassed_x <- add_tokenid(unclassed_x)
     attr(unclassed_x, "tokenvars") <- make_tokenvars(unclassed_x)
-    class(unclassed_x) <- c("tokens")
+    class(unclassed_x) <- c("tokens_with_tokenvars")
     return(unclassed_x)
 }
 
+#' @importFrom quanteda as.tokens
+#' @method as.tokens tokens_with_tokenvars
+#' @export
+as.tokens.tokens_with_tokenvars <- function(x, remove_tokenvars = TRUE, ...) {
+    if (remove_tokenvars) {
+        attr(x, "tokenvars") <- NULL
+    }
+    class(x) <- "tokens"
+    return(x)
+}
+
+#' @export
+print.tokens_with_tokenvars <- function(x, max_ndoc = quanteda::quanteda_options("print_tokens_max_ndoc"),
+               max_ntoken = quanteda::quanteda_options("print_tokens_max_ntoken"),
+               show_summary = quanteda::quanteda_options("print_tokens_summary"), ...) {
+    ## TODO
+    print(as.tokens(x, remove_tokenvars = FALSE), max_ndoc = max_ndoc, max_ntoken = max_ntoken, show_summary = show_summary)
+    cat("With Token Variables.\n")
+}
+
 make_tokenvars <- function(unclassed_x) {
     output <- list()
     for (i in seq_along(unclassed_x)) {
@@ -74,6 +94,5 @@ pp <- function(x, max_ndoc = quanteda::quanteda_options("print_tokens_max_ndoc")
     if (is.null(attr(x, "tokenvars"))) {
         print(x, max_ndoc = max_ndoc, max_ntoken = max_ntoken, show_summary = show_summary, ...)
         return(invisible(NULL))
-    }
-
+    }    
 }
diff --git a/README.Rmd b/README.Rmd
@@ -50,6 +50,8 @@ tokenvars(tok) ## nothing to see here
 ```{r example3}
 tokenvars(tok, "tag") <- list(c("NNP", "VBZ", "JJ", "IN", "JJ", "JJ", "NN", "NN", "."),
                               c("NNP", ".", "NNP", "VBD", "CD", "NNS", "IN", "NNP", "NNP", "."))
+tokenvars(tok, "lemma") <- list(c("spaCy", "be", "great", "at", "fast", "natural", "language", "processing", "."),
+                                c("Mr", ".", "Smith", "spend", "two", "year", "in", "North", "Carolina", "."))
 ```
 
 ```{r example4}
@@ -61,5 +63,5 @@ tokenvars(tok, field = "tag")
 ```
 
 ```{r example6}
-tokenvars(tok, field = "tag", docid = "d1")
+tokenvars(tok, field = "lemma", docid = "d2")
 ```
diff --git a/README.md b/README.md
@@ -42,7 +42,9 @@ tok
 #> 
 #> d2 :
 #>  [1] "Mr"       "."        "Smith"    "spent"    "two"      "years"   
-#>  [7] "in"       "North"    "Carolina" "."
+#>  [7] "in"       "North"    "Carolina" "."       
+#> 
+#> With Token Variables.
 ```
 
 ``` r
@@ -57,34 +59,36 @@ tokenvars(tok) ## nothing to see here
 ``` r
 tokenvars(tok, "tag") <- list(c("NNP", "VBZ", "JJ", "IN", "JJ", "JJ", "NN", "NN", "."),
                               c("NNP", ".", "NNP", "VBD", "CD", "NNS", "IN", "NNP", "NNP", "."))
+tokenvars(tok, "lemma") <- list(c("spaCy", "be", "great", "at", "fast", "natural", "language", "processing", "."),
+                                c("Mr", ".", "Smith", "spend", "two", "year", "in", "North", "Carolina", "."))
 ```
 
 ``` r
 tokenvars(tok)
 #> $d1
-#>   tag
-#> 1 NNP
-#> 2 VBZ
-#> 3  JJ
-#> 4  IN
-#> 5  JJ
-#> 6  JJ
-#> 7  NN
-#> 8  NN
-#> 9   .
+#>   tag      lemma
+#> 1 NNP      spaCy
+#> 2 VBZ         be
+#> 3  JJ      great
+#> 4  IN         at
+#> 5  JJ       fast
+#> 6  JJ    natural
+#> 7  NN   language
+#> 8  NN processing
+#> 9   .          .
 #> 
 #> $d2
-#>    tag
-#> 1  NNP
-#> 2    .
-#> 3  NNP
-#> 4  VBD
-#> 5   CD
-#> 6  NNS
-#> 7   IN
-#> 8  NNP
-#> 9  NNP
-#> 10   .
+#>    tag    lemma
+#> 1  NNP       Mr
+#> 2    .        .
+#> 3  NNP    Smith
+#> 4  VBD    spend
+#> 5   CD      two
+#> 6  NNS     year
+#> 7   IN       in
+#> 8  NNP    North
+#> 9  NNP Carolina
+#> 10   .        .
 ```
 
 ``` r
@@ -97,7 +101,8 @@ tokenvars(tok, field = "tag")
 ```
 
 ``` r
-tokenvars(tok, field = "tag", docid = "d1")
-#> $d1
-#> [1] "NNP" "VBZ" "JJ"  "IN"  "JJ"  "JJ"  "NN"  "NN"  "."
+tokenvars(tok, field = "lemma", docid = "d2")
+#> $d2
+#>  [1] "Mr"       "."        "Smith"    "spend"    "two"      "year"    
+#>  [7] "in"       "North"    "Carolina" "."
 ```