Skip to content

Commit 9bf7098

Browse files
authored
Merge pull request #401 from lcdb/universe
Limit background genes in overrepresentation analysis in functional-enrichment.Rmd
2 parents 22c2ac1 + 8f4afce commit 9bf7098

File tree

3 files changed

+57
-3
lines changed

3 files changed

+57
-3
lines changed

lib/lcdbwf/R/functional_enrichment.R

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
#' @param config Config object
55
#' @param cores Number of cores to run it on
66
#' @param sep Character to separate res_list names
7+
#' @param universe_list List of vectors for background genes
78
#'
89
#' @return nested list of enrichResult objects
910
run_enricher <- function(res_list, ontology_list, config,
10-
cores=1, sep='*'){
11+
cores=1, sep='*', universe_list=universe_list){
1112
# This function supports running in parallel which works best with a flat
1213
# list; however for organizational purposese we want a nested structure. So
1314
# we convert between the two by collapsing nested keys for flat list, and
@@ -42,7 +43,8 @@ run_enricher <- function(res_list, ontology_list, config,
4243
direction=direction,
4344
TERM2GENE=ontology_list[['term2gene']][[ont]],
4445
TERM2NAME=ontology_list[['term2name']][[ont]],
45-
config=config
46+
config=config,
47+
universe=universe_list[[name]]
4648
)
4749
enrich_res
4850
}, BPPARAM=BiocParallel::MulticoreParam(cores))
@@ -86,6 +88,21 @@ collapse_names <- function(res_list, config, sep='*'){
8688
return(names)
8789
}
8890

91+
92+
#' Function to retrieve genes with non-zero raw counts in one or more samples
93+
#'
94+
#' @param dds DESeqDataSet object
95+
#'
96+
#' @return a vector containing gene IDs with raw counts are greater than 0 in one or more samples
97+
nonzero_genes <- function(dds) {
98+
# Extract raw count matrix
99+
counts <- DESeq2::counts(dds, normalized=FALSE)
100+
# Subset rows where rowsums are greater than zero
101+
counts <- counts[rowSums(counts) > 0,]
102+
# Return gene IDs
103+
return(rownames(counts))
104+
}
105+
89106
#' All-in-one enrichment function.
90107
#'
91108
#' Designed to not require an orgdb, and instead requires dataframes of
@@ -101,11 +118,13 @@ collapse_names <- function(res_list, config, sep='*'){
101118
#' lfc_thresh from the config.
102119
#' @param kind One of "OR" for overrepresentation or "GSEA" for gene set
103120
#' enrichment analysis.
121+
#' @param universe background genes. If missing, all genes listed in the database
122+
#' (e.g. TERM2GENE table) will be used as background.
104123
#' @param ... Additional arguments are passed on to enricher() for kind="OR" or
105124
#' GSEA() for kind="GSEA".
106125
#'
107126
#' @return An enrichResults object from
108-
enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR', ...){
127+
enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR', universe, ...){
109128

110129
if (is.null(config$main$lfc_thresh)){
111130
lfc_thresh <- 0
@@ -128,6 +147,7 @@ enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR',
128147
TERM2NAME=TERM2NAME,
129148
pvalueCutoff=config$functional_enrichment$pvalueCutoff,
130149
qvalueCutoff=config$functional_enrichment$qvalueCutoff,
150+
universe=universe,
131151
...
132152
)
133153
} else if (kind == "GSEA"){
@@ -141,6 +161,7 @@ enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR',
141161
TERM2GENE=TERM2GENE,
142162
TERM2NAME=TERM2NAME,
143163
pvalueCutoff=config$functional_enrichment$pvalueCutoff,
164+
universe=universe,
144165
...
145166
)
146167

workflows/rnaseq/downstream/config.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,12 @@ functional_enrichment:
236236
# fine getting plots even with no statistically significant terms.
237237
pvalueCutoff: 1
238238
qvalueCutoff: 1
239+
240+
# This sets the `universe` argument of the `enricher` function to choose an option how to limit
241+
# background genes. Choose one from the following options:
242+
# - exclude_zero_in_all: excludes genes with zero raw count in all samples
243+
# - no_filter: disable any filtering. This option will use all available genes in
244+
# each annotation database. If a user wishes to use custom background genes,
245+
# use this option with `universe_list` set to a named list of background genes,
246+
# where the list names are identical to those of the result list.
247+
limit_background_genes: "exclude_zero_in_all"

workflows/rnaseq/downstream/functional-enrichment.Rmd

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,36 @@ if(!'degpatterns_list' %in% names(obj)){
7171
# obtain ontology information for all ontologies
7272
ontology_list <- lcdbwf:::get_ontology_list(config)
7373
74+
# Define background gene pool for overrepresentation analysis
75+
univ_option <- config$functional_enrichment$limit_background_genes
76+
77+
# Set `universe_list` based on configuration or custom background
78+
if (univ_option == "exclude_zero_in_all") {
79+
# Retrieve all genes detected in one or more samples per contrast
80+
universe_list <- lapply(names(dds_list), function(name) nonzero_genes(dds_list[[name]]))
81+
names(universe_list) <- names(dds_list)
82+
} else if (univ_option == "no_filter") {
83+
########################################################################################
84+
# Here, a user can manually set the `universe_list` to a named list of vectors for #
85+
# custom background genes. Ensure to have names matched between `universe_list` and #
86+
# `res_list`. #
87+
########################################################################################
88+
universe_list <- NULL
89+
# Raise an error if names are unmatched
90+
if (!identical(names(universe_list), names(res_list)) & !is.null(universe_list)) {
91+
stop("universe_list has different names from res_list.")
92+
}
93+
} else {
94+
stop("Correct your background option in the config.yaml file.")
95+
}
7496
```
7597

98+
7699
```{r enrich, cache=TRUE, eval=config$toggle$functional_enrichment, config=c(config$main, config$functional_enrichment), dependson='functional_enrichment_prep'}
77100
enrich_list <- lcdbwf:::run_enricher(res_list=res_list,
78101
ontology_list=ontology_list,
79102
config=config,
103+
universe_list=universe_list,
80104
cores=cores, sep='*')
81105
82106
```

0 commit comments

Comments
 (0)