diff --git a/DESCRIPTION b/DESCRIPTION index 5cf3510..5121a77 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,7 +57,7 @@ VignetteBuilder: knitr License: BSD_2_clause + file LICENSE biocViews: Software, GenomeAnnotation, GenomeAssembly, DataRepresentation, Sequencing, Coverage, FunctionalGenomics, Visualization -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 URL: http://code.databio.org/GenomicDistributions BugReports: http://github.com/databio/GenomicDistributions Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 13b8876..93375a3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(calcFeatureDist) export(calcFeatureDistRefTSS) export(calcGCContent) export(calcGCContentRef) +export(calcNearestGenes) export(calcNearestNeighbors) export(calcNeighborDist) export(calcOpenSignal) diff --git a/R/nearest-genes.R b/R/nearest-genes.R new file mode 100644 index 0000000..0305908 --- /dev/null +++ b/R/nearest-genes.R @@ -0,0 +1,34 @@ +#' Given a query and set of annotations, this function will calculate +#' the nearest annotation to each region in the region set, as well +#' as the nearest gene type and the distance to the nearest gene. +#' +#' @param query A GRanges or GRangesList object with query sets +#' @param annotations A GRanges or GRangesList object with annotation sets +#' +#' @return A data table that contains observations for each genomic region +#' and the associated aforementioned annotations. +#' @export +#' @examples +#' queryFile = system.file("extdata", "vistaEnhancers.bed.gz", package="GenomicDistributions") +#' query = rtracklayer::import(queryFile) +#' data(TSS_hg19) +#' +#' queryAnnotated = calcNearestGenes(query, TSS_hg19) +calcNearestGenes = function(query, annotation, gene_name_key="gene_id", gene_type_key="gene_biotype") { + .validateInputs(list(query=c("GRanges","GRangesList"))) + + # calculate the nearest annotations to given query + nearestIds = nearest(query, annotations) + + # annotate neaest gene and type + query$nearest_gene = annotations[nearestIDs]$gene_id + query$nearest_gene_type = annotations[nearestIds]$gene_biotype + + # annotate on the distance as well + query$nearest_distance = distance(query, annotations[nearestIds]) + + # dump a query to a data table and return + dt = grToDt(query) + + return(dt) +} diff --git a/man/calcNearestGenes.Rd b/man/calcNearestGenes.Rd new file mode 100644 index 0000000..99e31e3 --- /dev/null +++ b/man/calcNearestGenes.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nearest-genes.R +\name{calcNearestGenes} +\alias{calcNearestGenes} +\title{Given a query and set of annotations, this function will calculate +the nearest annotation to each region in the region set, as well +as the nearest gene type and the distance to the nearest gene.} +\usage{ +calcNearestGenes( + query, + annotation, + gene_name_key = "gene_id", + gene_type_key = "gene_biotype" +) +} +\arguments{ +\item{query}{A GRanges or GRangesList object with query sets} + +\item{annotations}{A GRanges or GRangesList object with annotation sets} +} +\value{ +A data table that contains observations for each genomic region + and the associated aforementioned annotations. +} +\description{ +Given a query and set of annotations, this function will calculate +the nearest annotation to each region in the region set, as well +as the nearest gene type and the distance to the nearest gene. +} +\examples{ +queryFile = system.file("extdata", "vistaEnhancers.bed.gz", package="GenomicDistributions") +query = rtracklayer::import(queryFile) +data(TSS_hg19) + +queryAnnotated = calcNearestGenes(query, TSS_hg19) +}