diff --git a/NAMESPACE b/NAMESPACE index fb00d0a..fcb0cf9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand +S3method(convert,tokens_with_proximity) S3method(dfm,tokens_with_proximity) S3method(print,tokens_with_proximity) export(tokens_proximity) +importFrom(quanteda,convert) importFrom(quanteda,dfm) diff --git a/R/get_dist.R b/R/get_dist.R index 942afa4..c344888 100644 --- a/R/get_dist.R +++ b/R/get_dist.R @@ -78,6 +78,12 @@ tokens_proximity <- function(x, keywords, get_min = TRUE, valuetype = c("glob", return(toks) } +.convert_df <- function(tokens_obj, proximity_obj, doc_id) { + data.frame("doc_id" = rep(doc_id, length(tokens_obj)), + "token" = tokens_obj, + "proximity" = proximity_obj) +} + #' @method print tokens_with_proximity #' @export print.tokens_with_proximity <- function(x, ...) { @@ -88,6 +94,18 @@ print.tokens_with_proximity <- function(x, ...) { cat("keywords: ", quanteda::meta(x, field = "keywords"), "\n") } +#' @method convert tokens_with_proximity +#' @export +#' @importFrom quanteda convert +convert.tokens_with_proximity <- function(x, to = c("data.frame")) { + to <- match.arg(to) + purrr::list_rbind( + purrr::pmap(list(tokens_obj = as.list(x), + proximity_obj = quanteda::docvars(x, "proximity"), + doc_id = quanteda::docnames(x)), + .convert_df) + ) +} #' Create a document-feature matrix #' diff --git a/tests/testthat/test-tokens_dist.R b/tests/testthat/test-tokens_dist.R index f61fad9..eefd613 100644 --- a/tests/testthat/test-tokens_dist.R +++ b/tests/testthat/test-tokens_dist.R @@ -2,6 +2,10 @@ test_that("defensive", { expect_error(tokens_proximity("a", "a"), "x is not a") }) +test_that("edge cases", { + expect_error("" %>% tokens() %>% tokens_proximity("") %>% convert(), NA) +}) + test_that(".resolve_keywords", { expect_equal(.resolve_keywords(c("abc", "def"), c("abcd", "defa"), valuetype = "fixed"), c("abc", "def")) expect_equal(.resolve_keywords(c("abc*", "def*"), c("abcd", "defa"), valuetype = "glob"), c("abcd", "defa")) @@ -18,3 +22,9 @@ test_that("count_from", { "this is my life" %>% tokens() %>% tokens_proximity("my", count_from = -1) %>% docvars("proximity") -> res expect_equal(res$text1, c(1, 0, -1, 0)) }) + +test_that("convert", { + suppressPackageStartupMessages(library(quanteda)) + "this is my life" %>% tokens() %>% tokens_proximity("my") %>% convert() -> res + expect_true(is.data.frame(res)) +})