Skip to content

Commit

Permalink
Merge pull request #70 from ihmeuw-demographics/feature/all_overlappi…
Browse files Browse the repository at this point in the history
…ng_intervals

option to identify all possible overlapping intervals
  • Loading branch information
chacalle authored Sep 20, 2021
2 parents 1481caa + abf946f commit 79f88bf
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 25 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Suggests:
ggplot2,
scales
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
VignetteBuilder: knitr
Language: en-US
Depends: R (>= 2.10)
Expand Down
36 changes: 24 additions & 12 deletions R/interval_assertions.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ identify_missing_intervals <- function(ints_dt, expected_ints_dt) {
#' overlapping intervals.
#'
#' @inheritParams identify_missing_intervals
#' @param identify_all_possible \[`logical(1)`\]\cr
#' Whether to return all overlapping intervals ('TRUE') or try to identify just
#' the less granular interval ('FALSE'). Default is 'FALSE'. Useful when it may
#' not be clear what is the less granular interval.
#'
#' @return `identify_overlapping_intervals` returns a \[`data.table()`\] with
#' columns for the 'start' and 'end' of the overlapping intervals. If no
Expand All @@ -93,10 +97,11 @@ identify_missing_intervals <- function(ints_dt, expected_ints_dt) {
#'
#' @examples
#' ints_dt <- data.table::data.table(
#' start = c(seq(0, 95, 5), 0),
#' end = c(seq(5, 95, 5), Inf, Inf)
#' start = c(seq(10, 50, 5), 0),
#' end = c(seq(15, 55, 5), 11)
#' )
#' overlapping_dt <- identify_overlapping_intervals(ints_dt)
#' overlapping_dt <- identify_overlapping_intervals(ints_dt, identify_all_possible = FALSE)
#' overlapping_dt <- identify_overlapping_intervals(ints_dt, identify_all_possible = TRUE)
#'
#'
#' @export
Expand All @@ -115,7 +120,7 @@ assert_no_overlapping_intervals <- function(ints_dt) {

#' @export
#' @rdname overlapping_intervals
identify_overlapping_intervals <- function(ints_dt) {
identify_overlapping_intervals <- function(ints_dt, identify_all_possible = FALSE) {

assertthat::assert_that(
assertive::is_data.table(ints_dt),
Expand All @@ -133,20 +138,27 @@ identify_overlapping_intervals <- function(ints_dt) {
overlaps <- intervals::interval_overlap(ints, ints)
names(overlaps) <- 1:length(overlaps)

# remove self match only
overlaps <- overlaps[sapply(overlaps, function(i) length(i) > 1)]

# sort by number of intervals that each interval overlaps with so that we can
# identify the largest overlapping intervals first
overlaps <- overlaps[order(sapply(overlaps, length), decreasing=T)]

overlapping_indices <- c()
for (i in names(overlaps)) {
# remove match to itself
overlaps[[i]] <- overlaps[[i]][overlaps[[i]] != i]
if (identify_all_possible) {
overlapping_indices <- names(overlaps)
} else {
overlapping_indices <- c()
for (i in names(overlaps)) {
# remove match to itself
overlaps[[i]] <- overlaps[[i]][overlaps[[i]] != i]

# remove indices of overlapping intervals that have already been identified
overlaps[[i]] <- overlaps[[i]][!overlaps[[i]] %in% overlapping_indices]
# remove indices of overlapping intervals that have already been identified
overlaps[[i]] <- overlaps[[i]][!overlaps[[i]] %in% overlapping_indices]

if (length(overlaps[[i]]) > 0) {
overlapping_indices <- c(overlapping_indices, i)
if (length(overlaps[[i]]) > 0) {
overlapping_indices <- c(overlapping_indices, i)
}
}
}
overlapping_ints <- ints[as.integer(overlapping_indices)]
Expand Down
2 changes: 1 addition & 1 deletion R/interval_collapse.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ collapse_common_intervals <- function(dt,
# check for overlapping intervals
if (overlapping_dt_severity != "skip") {
overlapping_dt <- dt[
, identify_overlapping_intervals(unique(.SD)),
, identify_overlapping_intervals(unique(.SD), identify_all_possible = overlapping_dt_severity != "none"),
.SDcols = cols, by = by_id_cols
]
data.table::setnames(overlapping_dt, c("start", "end"), cols)
Expand Down
14 changes: 10 additions & 4 deletions man/overlapping_intervals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions tests/testthat/test-interval_assertions.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,19 @@ ints_dt <- data.table(

testthat::test_that("missing intervals are identified correctly", {


testthat::expect_silent(
assert_no_overlapping_intervals(ints_dt)
)

expected_overlapping_dt <- data.table(
start = c(15),
end = c(60)
)

expected_overlapping_dt <- data.table(start = c(15), end = c(60))
ints_dt <- rbind(ints_dt, expected_overlapping_dt)

overlapping_dt <- identify_overlapping_intervals(ints_dt)
overlapping_dt <- identify_overlapping_intervals(ints_dt, identify_all_possible = FALSE)
setkeyv(expected_overlapping_dt, c("start", "end"))
testthat::expect_equal(overlapping_dt, expected_overlapping_dt)

overlapping_dt <- identify_overlapping_intervals(ints_dt, identify_all_possible = TRUE)
expected_overlapping_dt <- ints_dt[start >= 15 & end <= 60]
setkeyv(expected_overlapping_dt, c("start", "end"))
testthat::expect_equal(overlapping_dt, expected_overlapping_dt)

Expand Down

0 comments on commit 79f88bf

Please sign in to comment.