Skip to content

Commit

Permalink
Create own class #2 (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
chainsawriot authored Nov 26, 2023
1 parent d28dcde commit 90e45b1
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 29 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Generated by roxygen2: do not edit by hand

S3method(as.tokens,tokens_with_tokenvars)
S3method(print,tokens_with_tokenvars)
export("tokenvars<-")
export(tokens_add_tokenvars)
export(tokenvars)
importFrom(quanteda,as.tokens)
25 changes: 22 additions & 3 deletions R/tokenvars.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,30 @@ tokens_add_tokenvars <- function(x) {
unclassed_x <- unclass(x)
unclassed_x <- add_tokenid(unclassed_x)
attr(unclassed_x, "tokenvars") <- make_tokenvars(unclassed_x)
class(unclassed_x) <- c("tokens")
class(unclassed_x) <- c("tokens_with_tokenvars")
return(unclassed_x)
}

#' @importFrom quanteda as.tokens
#' @method as.tokens tokens_with_tokenvars
#' @export
as.tokens.tokens_with_tokenvars <- function(x, remove_tokenvars = TRUE, ...) {
if (remove_tokenvars) {
attr(x, "tokenvars") <- NULL
}
class(x) <- "tokens"
return(x)
}

#' @export
print.tokens_with_tokenvars <- function(x, max_ndoc = quanteda::quanteda_options("print_tokens_max_ndoc"),
max_ntoken = quanteda::quanteda_options("print_tokens_max_ntoken"),
show_summary = quanteda::quanteda_options("print_tokens_summary"), ...) {
## TODO
print(as.tokens(x, remove_tokenvars = FALSE), max_ndoc = max_ndoc, max_ntoken = max_ntoken, show_summary = show_summary)
cat("With Token Variables.\n")
}

make_tokenvars <- function(unclassed_x) {
output <- list()
for (i in seq_along(unclassed_x)) {
Expand All @@ -74,6 +94,5 @@ pp <- function(x, max_ndoc = quanteda::quanteda_options("print_tokens_max_ndoc")
if (is.null(attr(x, "tokenvars"))) {
print(x, max_ndoc = max_ndoc, max_ntoken = max_ntoken, show_summary = show_summary, ...)
return(invisible(NULL))
}

}
}
4 changes: 3 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ tokenvars(tok) ## nothing to see here
```{r example3}
tokenvars(tok, "tag") <- list(c("NNP", "VBZ", "JJ", "IN", "JJ", "JJ", "NN", "NN", "."),
c("NNP", ".", "NNP", "VBD", "CD", "NNS", "IN", "NNP", "NNP", "."))
tokenvars(tok, "lemma") <- list(c("spaCy", "be", "great", "at", "fast", "natural", "language", "processing", "."),
c("Mr", ".", "Smith", "spend", "two", "year", "in", "North", "Carolina", "."))
```

```{r example4}
Expand All @@ -61,5 +63,5 @@ tokenvars(tok, field = "tag")
```

```{r example6}
tokenvars(tok, field = "tag", docid = "d1")
tokenvars(tok, field = "lemma", docid = "d2")
```
55 changes: 30 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ tok
#>
#> d2 :
#> [1] "Mr" "." "Smith" "spent" "two" "years"
#> [7] "in" "North" "Carolina" "."
#> [7] "in" "North" "Carolina" "."
#>
#> With Token Variables.
```

``` r
Expand All @@ -57,34 +59,36 @@ tokenvars(tok) ## nothing to see here
``` r
tokenvars(tok, "tag") <- list(c("NNP", "VBZ", "JJ", "IN", "JJ", "JJ", "NN", "NN", "."),
c("NNP", ".", "NNP", "VBD", "CD", "NNS", "IN", "NNP", "NNP", "."))
tokenvars(tok, "lemma") <- list(c("spaCy", "be", "great", "at", "fast", "natural", "language", "processing", "."),
c("Mr", ".", "Smith", "spend", "two", "year", "in", "North", "Carolina", "."))
```

``` r
tokenvars(tok)
#> $d1
#> tag
#> 1 NNP
#> 2 VBZ
#> 3 JJ
#> 4 IN
#> 5 JJ
#> 6 JJ
#> 7 NN
#> 8 NN
#> 9 .
#> tag lemma
#> 1 NNP spaCy
#> 2 VBZ be
#> 3 JJ great
#> 4 IN at
#> 5 JJ fast
#> 6 JJ natural
#> 7 NN language
#> 8 NN processing
#> 9 . .
#>
#> $d2
#> tag
#> 1 NNP
#> 2 .
#> 3 NNP
#> 4 VBD
#> 5 CD
#> 6 NNS
#> 7 IN
#> 8 NNP
#> 9 NNP
#> 10 .
#> tag lemma
#> 1 NNP Mr
#> 2 . .
#> 3 NNP Smith
#> 4 VBD spend
#> 5 CD two
#> 6 NNS year
#> 7 IN in
#> 8 NNP North
#> 9 NNP Carolina
#> 10 . .
```

``` r
Expand All @@ -97,7 +101,8 @@ tokenvars(tok, field = "tag")
```

``` r
tokenvars(tok, field = "tag", docid = "d1")
#> $d1
#> [1] "NNP" "VBZ" "JJ" "IN" "JJ" "JJ" "NN" "NN" "."
tokenvars(tok, field = "lemma", docid = "d2")
#> $d2
#> [1] "Mr" "." "Smith" "spend" "two" "year"
#> [7] "in" "North" "Carolina" "."
```

0 comments on commit 90e45b1

Please sign in to comment.