Skip to content
This repository has been archived by the owner on Feb 11, 2024. It is now read-only.

Commit

Permalink
Add convert ref #15
Browse files Browse the repository at this point in the history
  • Loading branch information
chainsawriot committed Nov 15, 2023
1 parent c7e314d commit 6f8e470
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Generated by roxygen2: do not edit by hand

S3method(convert,tokens_with_proximity)
S3method(dfm,tokens_with_proximity)
S3method(print,tokens_with_proximity)
export(tokens_proximity)
importFrom(quanteda,convert)
importFrom(quanteda,dfm)
18 changes: 18 additions & 0 deletions R/get_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ tokens_proximity <- function(x, keywords, get_min = TRUE, valuetype = c("glob",
return(toks)
}

.convert_df <- function(tokens_obj, proximity_obj, doc_id) {
data.frame("doc_id" = rep(doc_id, length(tokens_obj)),
"token" = tokens_obj,
"proximity" = proximity_obj)
}

#' @method print tokens_with_proximity
#' @export
print.tokens_with_proximity <- function(x, ...) {
Expand All @@ -88,6 +94,18 @@ print.tokens_with_proximity <- function(x, ...) {
cat("keywords: ", quanteda::meta(x, field = "keywords"), "\n")
}

#' @method convert tokens_with_proximity
#' @export
#' @importFrom quanteda convert
convert.tokens_with_proximity <- function(x, to = c("data.frame")) {
to <- match.arg(to)
purrr::list_rbind(
purrr::pmap(list(tokens_obj = as.list(x),
proximity_obj = quanteda::docvars(x, "proximity"),
doc_id = quanteda::docnames(x)),
.convert_df)
)
}

#' Create a document-feature matrix
#'
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test-tokens_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ test_that("defensive", {
expect_error(tokens_proximity("a", "a"), "x is not a")
})

test_that("edge cases", {
expect_error("" %>% tokens() %>% tokens_proximity("") %>% convert(), NA)
})

test_that(".resolve_keywords", {
expect_equal(.resolve_keywords(c("abc", "def"), c("abcd", "defa"), valuetype = "fixed"), c("abc", "def"))
expect_equal(.resolve_keywords(c("abc*", "def*"), c("abcd", "defa"), valuetype = "glob"), c("abcd", "defa"))
Expand All @@ -18,3 +22,9 @@ test_that("count_from", {
"this is my life" %>% tokens() %>% tokens_proximity("my", count_from = -1) %>% docvars("proximity") -> res
expect_equal(res$text1, c(1, 0, -1, 0))
})

test_that("convert", {
suppressPackageStartupMessages(library(quanteda))
"this is my life" %>% tokens() %>% tokens_proximity("my") %>% convert() -> res
expect_true(is.data.frame(res))
})

0 comments on commit 6f8e470

Please sign in to comment.