### findOverlaps_NCList <- IRanges:::findOverlaps_NCList findOverlaps_NCLists <- IRanges:::findOverlaps_NCLists .transpose_hits <- function(hits) { if (is.list(hits)) return(lapply(hits, .transpose_hits)) t(hits) } ### Used in the unit tests for GNCList located in GenomicRanges. .compare_hits <- function(target, current) { if (is.list(target) || is(target, "List") && is.list(current) || is(current, "List")) return(all(mapply(.compare_hits, target, current))) identical(.transpose_hits(target), .transpose_hits(current)) } ### Used in the unit tests for GNCList located in GenomicRanges. .make_Hits_from_q2s <- function(q2s, s_len) { q_hits <- rep.int(seq_along(q2s), elementNROWS(q2s)) s_hits <- as.integer(unlist(q2s, use.names=FALSE)) Hits(q_hits, s_hits, length(q2s), s_len, sort.by.query=TRUE) } .make_Hits_from_s2q <- function(s2q, q_len) .transpose_hits(.make_Hits_from_q2s(s2q, q_len)) .select_hits <- function(x, select) { if (is.list(x)) return(lapply(x, .select_hits, select)) selectHits(x, select) } ### Vectorized. Return -1 if the query and subject overlap (i.e. if ### end(query) < start(subject) and end(subject) < start(query) are both ### false). Otherwise (i.e. if they are disjoint), return the width of the ### gap between them. Note that a gap width of 0 means that they are adjacent. ### TODO: Rename this pgapWidth(), make it a generic with various methods ### (at least one for IntegerRanges and one for GenomicRanges objects), and ### export it. .gapwidth <- function(query, subject) { ifelse(end(query) < start(subject), start(subject) - end(query), ifelse(end(subject) < start(query), start(query) - end(subject), 0L)) - 1L } ### Vectorized. ### TODO: Rename this poverlapWidth(), make it a generic with various methods ### (at least one for IntegerRanges and one for GenomicRanges objects), and ### export it. .overlapwidth <- function(query, subject) { score <- pmin.int(end(query), end(subject)) - pmax.int(start(query), start(subject)) + 1L pmax.int(score, 0L) } ### Used in the unit tests for GNCList located in GenomicRanges. .get_query_overlaps <- function(query, subject, maxgap=-1L, minoverlap=0L, type=c("any", "start", "end", "within", "extend", "equal")) { type <- match.arg(type) if (type == "any" && maxgap != -1L && minoverlap != 0L) stop("when 'type' is \"any\", at least one of 'maxgap' ", "and 'minoverlap' must be set to its default value") overlapwidth <- .overlapwidth(query, subject) ok <- overlapwidth >= minoverlap if (type == "any") { gapwidth <- .gapwidth(query, subject) ok <- ok & gapwidth <= maxgap return(ok) } if (maxgap == -1L) maxgap <- 0L if (type != "end") d1 <- abs(start(subject) - start(query)) if (type != "start") d2 <- abs(end(subject) - end(query)) if (type == "start") return(ok & d1 <= maxgap) if (type == "end") return(ok & d2 <= maxgap) if (type == "equal") return(ok & d1 <= maxgap & d2 <= maxgap) if (type == "within") { ok2 <- start(query) >= start(subject) & end(query) <= end(subject) } else { # type == "extend" ok2 <- start(query) <= start(subject) & end(query) >= end(subject) } ok <- ok & ok2 if (maxgap > 0L) ok <- ok & (d1 + d2) <= maxgap ok } .findOverlaps_naive <- function(query, subject, maxgap=-1L, minoverlap=0L, type=c("any", "start", "end", "within", "extend", "equal"), select=c("all", "first", "last", "arbitrary", "count")) { type <- match.arg(type) select <- match.arg(select) hits_per_query <- lapply(seq_along(query), function(i) which(.get_query_overlaps(query[i], subject, maxgap=maxgap, minoverlap=minoverlap, type=type))) hits <- .make_Hits_from_q2s(hits_per_query, length(subject)) selectHits(hits, select=select) } test_NCList <- function() { x <- IRanges(rep.int(1:6, 6:1), c(0:5, 1:5, 2:5, 3:5, 4:5, 5), names=LETTERS[1:21]) mcols(x) <- DataFrame(score=seq(0.7, by=0.045, length.out=21)) nclist <- NCList(x) checkTrue(is(nclist, "NCList")) checkTrue(validObject(nclist, complete=TRUE)) checkIdentical(x, ranges(nclist, use.mcols=TRUE)) checkIdentical(length(x), length(nclist)) checkIdentical(names(x), names(nclist)) checkIdentical(start(x), start(nclist)) checkIdentical(end(x), end(nclist)) checkIdentical(width(x), width(nclist)) checkIdentical(x, as(nclist, "IRanges")) checkIdentical(x[-6], as(nclist[-6], "IRanges")) } ### Test findOverlaps_NCList() *default* behavior, that is, with all optional ### arguments (i.e. 'maxgap', 'minoverlap', 'type', 'select', and ### 'circle.length') set to their default value. test_findOverlaps_NCList <- function() { query <- IRanges(-3:7, width=3) subject <- IRanges(rep.int(1:6, 6:1), c(0:5, 1:5, 2:5, 3:5, 4:5, 5)) target0 <- .findOverlaps_naive(query, subject) current <- findOverlaps_NCList(query, NCList(subject)) checkTrue(.compare_hits(target0, current)) current <- findOverlaps_NCList(NCList(query), subject) checkTrue(.compare_hits(target0, current)) current <- findOverlaps_NCList(query, subject) checkTrue(.compare_hits(target0, current)) ## Shuffle query and/or subject elements. permute_input <- function(q_perm, s_perm) { q_revperm <- integer(length(q_perm)) q_revperm[q_perm] <- seq_along(q_perm) s_revperm <- integer(length(s_perm)) s_revperm[s_perm] <- seq_along(s_perm) target <- remapHits(target0, Lnodes.remapping=q_revperm, new.nLnode=length(q_perm), Rnodes.remapping=s_revperm, new.nRnode=length(s_perm)) current <- findOverlaps_NCList(query[q_perm], NCList(subject[s_perm])) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCList(NCList(query[q_perm]), subject[s_perm]) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCList(query[q_perm], subject[s_perm]) checkTrue(.compare_hits(target, current)) } q_perm <- rev(seq_along(query)) s_perm <- rev(seq_along(subject)) permute_input(q_perm, seq_along(subject)) # reverse query permute_input(seq_along(query), s_perm) # reverse subject permute_input(q_perm, s_perm) # reverse both set.seed(97) for (i in 1:33) { ## random permutations q_perm <- sample(length(query)) s_perm <- sample(length(subject)) permute_input(q_perm, seq_along(subject)) permute_input(seq_along(query), s_perm) permute_input(q_perm, s_perm) } } test_findOverlaps_NCList_with_filtering <- function() { query <- IRanges(-3:7, width=3) subject <- IRanges(rep.int(1:6, 6:1), c(0:5, 1:5, 2:5, 3:5, 4:5, 5)) pp_query <- NCList(query) pp_subject <- NCList(subject) for (type in c("any", "start", "end", "within", "extend", "equal")) { for (maxgap in -1:3) { if (type != "any" || maxgap == -1L) max_minoverlap <- 4L else max_minoverlap <- 0L for (minoverlap in 0:max_minoverlap) { for (select in c("all", "first", "last", "count")) { ## query - subject target <- .findOverlaps_naive(query, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) current <- findOverlaps_NCList(query, pp_subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCList(pp_query, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCList(query, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) ## subject - query target <- .findOverlaps_naive(subject, query, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) current <- findOverlaps_NCList(pp_subject, query, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCList(subject, pp_query, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCList(subject, query, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) ## subject - subject target <- .findOverlaps_naive(subject, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) current <- findOverlaps_NCList(pp_subject, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCList(subject, pp_subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCList(subject, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select=select) checkTrue(.compare_hits(target, current)) } } } } } ### Only test "start" and "end" types at the moment. test_findOverlaps_NCList_special_types <- function() { x <- IRanges(10, 10) x1 <- IRanges(10, 9) y1 <- IRanges(start=c(7, 7, 13, 13), width=c(2, 0, 2, 0)) stopifnot(all(abs(start(x) - start(y1)) == 3L)) stopifnot(all(abs(start(x1) - start(y1)) == 3L)) x2 <- IRanges(11, 10) y2 <- IRanges(end=c(7, 7, 13, 13), width=c(2, 0, 2, 0)) stopifnot(all(abs(end(x) - end(y2)) == 3L)) stopifnot(all(abs(end(x2) - end(y2)) == 3L)) test_maxgap_and_type <- function(maxgap, minoverlap, nhit) { hits <- findOverlaps(x, y1, maxgap=maxgap, minoverlap=minoverlap, type="start") checkEquals(nhit, length(hits)) hits <- findOverlaps(y1, x, maxgap=maxgap, minoverlap=minoverlap, type="start") checkEquals(nhit, length(hits)) hits <- findOverlaps(x1, y1, maxgap=maxgap, minoverlap=minoverlap, type="start") checkEquals(nhit, length(hits)) hits <- findOverlaps(y1, x1, maxgap=maxgap, minoverlap=minoverlap, type="start") checkEquals(nhit, length(hits)) hits <- findOverlaps(x, y2, maxgap=maxgap, minoverlap=minoverlap, type="end") checkEquals(nhit, length(hits)) hits <- findOverlaps(y2, x, maxgap=maxgap, minoverlap=minoverlap, type="end") checkEquals(nhit, length(hits)) hits <- findOverlaps(x2, y2, maxgap=maxgap, minoverlap=minoverlap, type="end") checkEquals(nhit, length(hits)) hits <- findOverlaps(y2, x2, maxgap=maxgap, minoverlap=minoverlap, type="end") checkEquals(nhit, length(hits)) } ## no hits for (maxgap in -1:2) { test_maxgap_and_type(maxgap, minoverlap=1L, 0L) test_maxgap_and_type(maxgap, minoverlap=0L, 0L) } for (maxgap in 3:5) { ## no hits test_maxgap_and_type(maxgap, minoverlap=1L, 0L) ## 4 hits test_maxgap_and_type(maxgap, minoverlap=0L, 4L) } } .test_arbitrary_selection <- function(query, subject) { pp_query <- NCList(query) pp_subject <- NCList(subject) for (type in c("any", "start", "end", "within", "extend", "equal")) { for (maxgap in -1:3) { if (type != "any" || maxgap == -1L) max_minoverlap <- 4L else max_minoverlap <- 0L for (minoverlap in 0:max_minoverlap) { target <- as(.findOverlaps_naive(query, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select="all"), "CompressedIntegerList") target_idx0 <- elementNROWS(target) == 0L check_arbitrary_hits <- function(current) { current_idx0 <- is.na(current) checkIdentical(target_idx0, current_idx0) current <- as(current, "CompressedIntegerList") checkTrue(all(current_idx0 | as.logical(current %in% target))) } current <- findOverlaps_NCList(query, pp_subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select="arbitrary") check_arbitrary_hits(current) current <- findOverlaps_NCList(pp_query, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select="arbitrary") check_arbitrary_hits(current) current <- findOverlaps_NCList(query, subject, maxgap=maxgap, minoverlap=minoverlap, type=type, select="arbitrary") check_arbitrary_hits(current) } } } } test_findOverlaps_NCList_arbitrary <- function() { query <- IRanges(4:3, 6) subject <- IRanges(2:4, 10) .test_arbitrary_selection(query, subject) query <- IRanges(-3:7, width=3) subject <- IRanges(rep.int(1:6, 6:1), c(0:5, 1:5, 2:5, 3:5, 4:5, 5)) .test_arbitrary_selection(query, subject) } .test_circularity <- function(query0, subject0, circle_length, target0, pp, findOverlaps_pp, type) { for (i in -2:2) { query <- shift(query0, shift=i*circle_length) pp_query <- pp(query, circle.length=circle_length) for (j in -2:2) { subject <- shift(subject0, shift=j*circle_length) pp_subject <- pp(subject, circle.length=circle_length) for (select in c("all", "first", "last", "count")) { target <- .select_hits(target0, select=select) current <- findOverlaps_pp(query, pp_subject, type=type, select=select, circle.length=circle_length) checkTrue(.compare_hits(target, current)) current <- findOverlaps_pp(pp_query, subject, type=type, select=select, circle.length=circle_length) checkTrue(.compare_hits(target, current)) current <- findOverlaps_pp(query, subject, type=type, select=select, circle.length=circle_length) checkTrue(.compare_hits(target, current)) target <- .select_hits(.transpose_hits(target0), select=select) current <- findOverlaps_pp(pp_subject, query, type=type, select=select, circle.length=circle_length) checkTrue(.compare_hits(target, current)) current <- findOverlaps_pp(subject, pp_query, type=type, select=select, circle.length=circle_length) checkTrue(.compare_hits(target, current)) current <- findOverlaps_pp(subject, query, type=type, select=select, circle.length=circle_length) checkTrue(.compare_hits(target, current)) } } } } test_findOverlaps_NCList_with_circular_space <- function() { query <- IRanges(-2:17, width=3) subject <- IRanges(c(4, -1, 599), c(7, 0, 999)) circle_length <- 10L ## type "any" s2q <- list(c(5:10, 15:20L), c(1:3, 10:13, 20L), 1:20) target <- .make_Hits_from_s2q(s2q, length(query)) .test_circularity(query, subject, circle_length, target, NCList, findOverlaps_NCList, "any") ## type "start" s2q <- lapply(start(subject), function(s) which((start(query) - s) %% circle_length == 0L)) target <- .make_Hits_from_s2q(s2q, length(query)) .test_circularity(query, subject, circle_length, target, NCList, findOverlaps_NCList, "start") ## type "end" s2q <- lapply(end(subject), function(e) which((end(query) - e) %% circle_length == 0L)) target <- .make_Hits_from_s2q(s2q, length(query)) .test_circularity(query, subject, circle_length, target, NCList, findOverlaps_NCList, "end") } test_NCLists <- function() { x1 <- IRanges(-3:7, width=3) x2 <- IRanges() x3 <- IRanges(rep.int(1:6, 6:1), c(0:5, 1:5, 2:5, 3:5, 4:5, 5)) x <- IRangesList(x1=x1, x2=x2, x3=x3) mcols(x) <- DataFrame(label=c("first", "second", "third")) nclists <- NCLists(x) checkTrue(is(nclists, "NCLists")) checkTrue(validObject(nclists, complete=TRUE)) checkIdentical(x, ranges(nclists, use.mcols=TRUE)) checkIdentical(length(x), length(nclists)) checkIdentical(names(x), names(nclists)) checkIdentical(start(x), start(nclists)) checkIdentical(end(x), end(nclists)) checkIdentical(width(x), width(nclists)) checkIdentical(x, as(nclists, "IRangesList")) checkIdentical(x[-1], as(nclists[-1], "IRangesList")) checkIdentical(elementNROWS(x), elementNROWS(nclists)) nclist <- nclists[[3]] checkTrue(is(nclist, "NCList")) checkTrue(validObject(nclist, complete=TRUE)) checkIdentical(x3, as(nclist, "IRanges")) } test_findOverlaps_NCLists <- function() { ir1 <- IRanges(-3:7, width=3) ir2 <- IRanges(rep.int(1:6, 6:1), c(0:5, 1:5, 2:5, 3:5, 4:5, 5)) target0 <- mapply(findOverlaps_NCList, list(ir1, ir2), list(ir2, ir1)) for (compress in c(TRUE, FALSE)) { query <- IRangesList(ir1, ir2, IRanges(2, 7), compress=compress) pp_query <- NCLists(query) subject <- IRangesList(ir2, ir1, compress=compress) pp_subject <- NCLists(subject) for (select in c("all", "first", "last", "count")) { target <- .select_hits(target0, select=select) current <- findOverlaps_NCLists(query, pp_subject, select=select) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCLists(pp_query, subject, select=select) checkTrue(.compare_hits(target, current)) current <- findOverlaps_NCLists(query, subject, select=select) checkTrue(.compare_hits(target, current)) } } } test_findOverlaps_NCLists_with_circular_space <- function() { query1 <- IRanges(-2:17, width=3) subject1 <- IRanges(c(4, -1, 599), c(7, 0, 999)) query <- IRangesList(query1, IRanges(), subject1) subject <- IRangesList(subject1, IRanges(), query1) circle_length <- c(10L, NA_integer_, 10L) s2q <- list(c(5:10, 15:20L), c(1:3, 10:13, 20L), 1:20) target1 <- .make_Hits_from_s2q(s2q, length(query1)) target2 <- .make_Hits_from_s2q(list(), 0) target3 <- .transpose_hits(target1) target <- list(target1, target2, target3) .test_circularity(query, subject, circle_length, target, NCLists, findOverlaps_NCLists, "any") }