From 4d0f4a67bdfe7cf56aa4beea1e77f4bf616e7761 Mon Sep 17 00:00:00 2001 From: Nathan LeRoy Date: Thu, 28 Oct 2021 18:11:07 -0400 Subject: [PATCH 1/2] add nearest-genes function --- R/nearest-genes.R | 34 +++++++++++++++++++ ...tNeighborDist.Rd => plotNeighborDist 2.Rd} | 0 2 files changed, 34 insertions(+) create mode 100644 R/nearest-genes.R rename man/{plotNeighborDist.Rd => plotNeighborDist 2.Rd} (100%) diff --git a/R/nearest-genes.R b/R/nearest-genes.R new file mode 100644 index 00000000..03059082 --- /dev/null +++ b/R/nearest-genes.R @@ -0,0 +1,34 @@ +#' Given a query and set of annotations, this function will calculate +#' the nearest annotation to each region in the region set, as well +#' as the nearest gene type and the distance to the nearest gene. +#' +#' @param query A GRanges or GRangesList object with query sets +#' @param annotations A GRanges or GRangesList object with annotation sets +#' +#' @return A data table that contains observations for each genomic region +#' and the associated aforementioned annotations. +#' @export +#' @examples +#' queryFile = system.file("extdata", "vistaEnhancers.bed.gz", package="GenomicDistributions") +#' query = rtracklayer::import(queryFile) +#' data(TSS_hg19) +#' +#' queryAnnotated = calcNearestGenes(query, TSS_hg19) +calcNearestGenes = function(query, annotation, gene_name_key="gene_id", gene_type_key="gene_biotype") { + .validateInputs(list(query=c("GRanges","GRangesList"))) + + # calculate the nearest annotations to given query + nearestIds = nearest(query, annotations) + + # annotate neaest gene and type + query$nearest_gene = annotations[nearestIDs]$gene_id + query$nearest_gene_type = annotations[nearestIds]$gene_biotype + + # annotate on the distance as well + query$nearest_distance = distance(query, annotations[nearestIds]) + + # dump a query to a data table and return + dt = grToDt(query) + + return(dt) +} diff --git a/man/plotNeighborDist.Rd b/man/plotNeighborDist 2.Rd similarity index 100% rename from man/plotNeighborDist.Rd rename to man/plotNeighborDist 2.Rd From 2ef6e47870f71b7a187e31e4931faf8769dd69d6 Mon Sep 17 00:00:00 2001 From: Kristyna Kupkova Date: Tue, 30 Nov 2021 18:38:13 -0500 Subject: [PATCH 2/2] updated documentation --- DESCRIPTION | 2 +- NAMESPACE | 1 + man/calcNearestGenes.Rd | 36 +++++++++++++++++++ ...tNeighborDist 2.Rd => plotNeighborDist.Rd} | 0 4 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 man/calcNearestGenes.Rd rename man/{plotNeighborDist 2.Rd => plotNeighborDist.Rd} (100%) diff --git a/DESCRIPTION b/DESCRIPTION index 5cf35106..5121a776 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,7 +57,7 @@ VignetteBuilder: knitr License: BSD_2_clause + file LICENSE biocViews: Software, GenomeAnnotation, GenomeAssembly, DataRepresentation, Sequencing, Coverage, FunctionalGenomics, Visualization -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 URL: http://code.databio.org/GenomicDistributions BugReports: http://github.com/databio/GenomicDistributions Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 13b88766..93375a3b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(calcFeatureDist) export(calcFeatureDistRefTSS) export(calcGCContent) export(calcGCContentRef) +export(calcNearestGenes) export(calcNearestNeighbors) export(calcNeighborDist) export(calcOpenSignal) diff --git a/man/calcNearestGenes.Rd b/man/calcNearestGenes.Rd new file mode 100644 index 00000000..99e31e3d --- /dev/null +++ b/man/calcNearestGenes.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nearest-genes.R +\name{calcNearestGenes} +\alias{calcNearestGenes} +\title{Given a query and set of annotations, this function will calculate +the nearest annotation to each region in the region set, as well +as the nearest gene type and the distance to the nearest gene.} +\usage{ +calcNearestGenes( + query, + annotation, + gene_name_key = "gene_id", + gene_type_key = "gene_biotype" +) +} +\arguments{ +\item{query}{A GRanges or GRangesList object with query sets} + +\item{annotations}{A GRanges or GRangesList object with annotation sets} +} +\value{ +A data table that contains observations for each genomic region + and the associated aforementioned annotations. +} +\description{ +Given a query and set of annotations, this function will calculate +the nearest annotation to each region in the region set, as well +as the nearest gene type and the distance to the nearest gene. +} +\examples{ +queryFile = system.file("extdata", "vistaEnhancers.bed.gz", package="GenomicDistributions") +query = rtracklayer::import(queryFile) +data(TSS_hg19) + +queryAnnotated = calcNearestGenes(query, TSS_hg19) +} diff --git a/man/plotNeighborDist 2.Rd b/man/plotNeighborDist.Rd similarity index 100% rename from man/plotNeighborDist 2.Rd rename to man/plotNeighborDist.Rd