Skip to content

Commit e6e14a3

Browse files
authored
Merge pull request #396 from lcdb/LRT
Implemented LRT functionality
2 parents 9bf7098 + c19c271 commit e6e14a3

File tree

15 files changed

+796
-120
lines changed

15 files changed

+796
-120
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,7 @@ workflows/rnaseq/downstream/results
6262
workflows/rnaseq/downstream/rnaseq_cache
6363
workflows/rnaseq/downstream/rnaseq_files
6464
workflows/rnaseq/downstream/rnaseq.html
65+
*.xlsx
66+
._*
67+
Rplots.pdf
68+
/lib/include/*

docs/rnaseq-rmd.rst

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ Here is how the code above would look using this method:
208208
subset.counts=TRUE))
209209
)
210210
211-
dds.list <- map(lst, lcdbwf::make_dds, config=config, parallel=config$parallel$parallel)
211+
dds_list <- map(lst, lcdbwf::make_dds, config=config, parallel=config$parallel$parallel)
212212
213213
That is, first we create a list of lists (``lst``), and then we used ``map()`` to apply
214214
the ``make_dds`` function to all items in the list. The collapsing of
@@ -260,6 +260,24 @@ adds some extra convenience when working with lists of dds objects, including
260260
the detection of parallelization as set up in the config object. See the help
261261
for ``lcdbwf::make_results()`` for more details.
262262

263+
By default, if no test argument is specified in the parameters for
264+
``lcdbwf::make_dds`` found here in `examples 1-4, <https://github.com/lcdb/lcdb-wf/blob/LRT/workflows/rnaseq/downstream/rnaseq.Rmd#L164-L187>`_
265+
the Wald test is performed. When ``lcdbwf::make_results`` processes a Wald test dds object, it
266+
detects the Wald test and expects a ``contrast`` or ``coef`` argument to specify which
267+
p-values and log2FoldChange values to report.
268+
269+
DESeq2 also supports the nBinomLRT (LRT). `Example 5 <https://github.com/lcdb/lcdb-wf/blob/LRT/workflows/rnaseq/downstream/rnaseq.Rmd#L189-L194>`_
270+
demonstrates how to create a dds object with LRT data. Since the LRT tests
271+
the removal of one or more terms from the design formula, a single
272+
log2FoldChange column doesn't reflect the test's complexity. DESeq2's results
273+
object is optimized for the Wald test, and when storing LRT results, it
274+
maintains consistency in datastructure by choosing a single pair-wise comparison for
275+
log2FoldChange values. To avoid confusion, ***we set all log2FoldChange values to
276+
0 for LRT results***.
277+
278+
For more details, see the DESeq2 documentation:
279+
`DESeq2 Likelihood Ratio Test <https://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#i-ran-a-likelihood-ratio-test-but-results-only-gives-me-one-comparison>`_.
280+
263281
.. _rules:
264282

265283
To take advantage of this infrastructure, we put each of those contrasts into

lib/lcdbwf/R/annotations.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ get_annotation_hub <- function(config, localHub=NULL, force=NULL, cache=NULL){
2626
}
2727

2828
ah <- AnnotationHub::AnnotationHub(
29-
hub=getAnnotationHubOption('URL'),
29+
hub=AnnotationHub::getAnnotationHubOption('URL'),
3030
proxy=proxy,
3131
localHub=localHub,
3232
cache=cache
@@ -95,7 +95,7 @@ get_annotation_db <- function(config, dbtype, genus_species=NULL, orgdb_key_over
9595
)
9696
}
9797

98-
hits <- mcols(ah.query) %>%
98+
hits <- SummarizedExperiment::mcols(ah.query) %>%
9999
as.data.frame() %>%
100100
dplyr::arrange(desc(rdatadateadded)) %>%
101101
dplyr::filter(rdataclass==dbtype)

lib/lcdbwf/R/contrasts.R

Lines changed: 89 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ dds_coefs <- function(dds, ..., expand=FALSE){
100100
}
101101

102102

103-
#' Convenience function for building contrasts
103+
#' Convenience function for building contrasts
104104
#'
105105
#' @description
106106
#'
@@ -125,6 +125,9 @@ dds_coefs <- function(dds, ..., expand=FALSE){
125125
#' @param label Label to describe this contrast which will be used in headings.
126126
#' @param dds_list List of dds objects. If NULL, then look in the global
127127
#' environment for an object called "dds_list" and use that.
128+
#' @param type Type of shrinkage for use by lfcShrink(). If no type is given,
129+
#' we use the current DESeq2 default `type` argument for lfcShrink. If
130+
#' NULL is given, we skip lfcShrink() altogether and directly return the object from results().
128131
#' @param ... Additional arguments are passed to results() and lfcShrink(). If
129132
#' "parallel" is not explicitly specified here, then look in the global env for
130133
#' a variable called "config" and find the parallel config setting from there.
@@ -154,48 +157,78 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){
154157
if (is.null(parallel)) parallel <- FALSE
155158
}
156159

157-
# Modify the args based on what we detected. Note that results() expects the
158-
dots['parallel'] <- parallel
160+
# Modify the args based on what we detected.
161+
dots[['parallel']] <- parallel
159162

160163
# Note that results() expects the argument to be called 'object' rather than,
161164
# say, 'dds'.
162165
dots[['object']] <- dds
163166

167+
# Ensure any provided `test` argument is consistent with the dds object provided.
168+
# This uses names from mcols(dds) to detect how the dds object was created.
169+
test_detected <- FALSE
170+
if ('test' %in% names(dots)) {
171+
if ((dots$test == 'Wald' && any(grepl('LRT', names(S4Vectors::mcols(dds))))) ||
172+
(dots$test == 'LRT' && any(grepl('Wald', names(S4Vectors::mcols(dds)))))) {
173+
stop("The 'test' passed to make_results does not match the detected test type in dds")
174+
}
175+
} else {
176+
if (any(grepl('LRT', names(S4Vectors::mcols(dds))))) {
177+
dots$test <- 'LRT'
178+
test_detected <- TRUE
179+
} else if (any(grepl('Wald', names(S4Vectors::mcols(dds))))) {
180+
dots$test <- 'Wald'
181+
test_detected <- TRUE
182+
} else {
183+
stop("test type was missing from make_results call and could not be detected from dds")
184+
}
185+
}
186+
187+
# Set the current default for 'type' from DESeq2 for lfcShrink if 'type' was not provided.
188+
# This inspects the function definition of lfcShrink to see what the current default is
189+
# (we have have seen it change before, hence the check).
190+
if (!'type' %in% names(dots)) {
191+
dots$type <- eval(formals(DESeq2::lfcShrink)$type)[1]
192+
}
193+
164194
# Call results() with the subset of dots that it accepts.
165-
results_dots <- lcdbwf:::match_from_dots(dots, results)
166-
res <- do.call("results", results_dots)
167-
168-
# We're about to call lfcShrink, but it needs the res object...so inject the
169-
# one we just made into dots.
170-
dots[['res']] <- res
171-
172-
# lfcShrink also needs the dds object, so inject that too
173-
dots[['dds']] <- dds
174-
175-
lfcShrink_dots <- lcdbwf:::match_from_dots(dots, lfcShrink)
176-
res <- do.call("lfcShrink", lfcShrink_dots)
177-
178-
# Add the shrinkage type to the metadata of the results object.
179-
#
180-
# If "type" was specified when calling this function, it's easy and we use
181-
# that. Otherwise, if it was not specified then DESeq2 used the default.
182-
# Since that default can change as we have seen in the past, we need to
183-
# inspect the lfcShrink function itself to see what the current default is,
184-
# and use that.
185-
shrinkage_type <- dots[['type']]
186-
if (is.null(shrinkage_type)){
187-
# The definition of lfcShrink has a character vector as the type argument,
188-
# and we want to extract the first thing in that vector. But formals()
189-
# return strings, so we need to eval that string to convert it to
190-
# a character vector such that we can extract the first thing.
191-
#
192-
# In recent versions this should evaluate to "apeglm". But this way we
193-
# are inspecting the function itself if it ever changes.
194-
shrinkage_type <- eval(formals(DESeq2::lfcShrink)$type)[1]
195+
results_dots <- lcdbwf:::match_from_dots(dots, DESeq2::results)
196+
res <- do.call(DESeq2::results, results_dots)
197+
198+
# When make_results is called with 'test' set to 'LRT',
199+
# or when make_results is called with 'test' missing but the
200+
# DDS object contains the LRT, we convert all values in the log2FoldChange
201+
# column of the DESeqResults object to 0. LFC values only make sense to report for a single
202+
# comparison of two sample groups. This only applies to the Wald test.
203+
# LRT is instead performing a test of the removal of one or more factor(s) from the design formula.
204+
# DESeq2 reports log2FoldChange values for a single pair-wise comparison when test == 'LRT'. This
205+
# can be misleading and so this is our solution.
206+
207+
# Adjust log2FoldChange for LRT test
208+
if (!is.null(dots$test) && dots$test == 'LRT') {
209+
res$log2FoldChange <- 0
210+
warning("All log2FoldChange values in the DESeq2 results object have been set to 0. See https://github.com/lcdb/lcdb-wf/blob/master/docs/rnaseq-rmd.rst?plain=1#L269.")
195211
}
196212

197-
# Add to results object so we can report it out later.
198-
metadata(res)$type <- shrinkage_type
213+
# Checks for LRT test and non-NULL type
214+
if (!is.null(dots$type) && !is.null(dots$test) && dots$test == 'LRT' && !test_detected) {
215+
stop("You cannot pass a non-NULL or missing type to make_results with test == 'LRT'. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.")
216+
} else if (!is.null(dots$type) && !is.null(dots$test) && dots$test == 'LRT' && test_detected) {
217+
stop("You cannot pass a non-NULL or missing type to make_results with an LRT dds object. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.")
218+
}
219+
220+
# Call lfcShrink if applicable
221+
if (!is.null(dots$type) && dots$test != 'LRT') {
222+
dots[['res']] <- res
223+
dots[['dds']] <- dds
224+
225+
lfcShrink_dots <- lcdbwf:::match_from_dots(dots, DESeq2::lfcShrink)
226+
res <- do.call(DESeq2::lfcShrink, lfcShrink_dots)
227+
228+
S4Vectors::metadata(res)$type <- dots$type
229+
} else {
230+
S4Vectors::metadata(res)$type <- NULL
231+
}
199232

200233
return(
201234
list(
@@ -206,33 +239,36 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){
206239
)
207240
}
208241

209-
210242
results_diagnostics <- function(res, dds, name, config, text){
211243
lcdbwf:::mdcat('### Other diagnostics')
212244
print(knitr::kable(lcdbwf:::my_summary(res, dds, name)))
213245

214246
lcdbwf:::folded_markdown(text$results_diagnostics$filter_ma, "Help")
215-
filterThreshold <- metadata(res)$filterThreshold
216-
p <- ggplot(res %>% as.data.frame() %>% mutate(filtered=res$baseMean < filterThreshold)) +
217-
aes(x=log10(baseMean), y=log2FoldChange, color=filtered) +
218-
geom_point()
219-
print(p)
247+
filterThreshold <- S4Vectors::metadata(res)$filterThreshold
248+
p1 <- ggplot2::ggplot(res %>% as.data.frame() %>% dplyr::mutate(filtered=res$baseMean < filterThreshold)) +
249+
ggplot2::aes(x=log10(baseMean), y=log2FoldChange, color=filtered) +
250+
ggplot2::geom_point()
251+
print(p1)
220252

221253
lcdbwf:::folded_markdown(text$results_diagnostics$outlier_ma, "Help")
222-
p <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) +
223-
aes(x=log10(baseMean), y=log2FoldChange, color=outlier) +
224-
geom_point()
225-
print(p)
254+
p2 <- ggplot2::ggplot(res %>% as.data.frame() %>% dplyr::mutate(outlier=is.na(res$pvalue))) +
255+
ggplot2::aes(x=log10(baseMean), y=log2FoldChange, color=outlier) +
256+
ggplot2::geom_point()
257+
print(p2)
226258

227259
lcdbwf:::folded_markdown(text$results_diagnostics$lfcse_basemean, "Help")
228-
p <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) +
229-
aes(x=log10(baseMean), y=lfcSE, color=outlier) +
230-
geom_point()
231-
print(p)
260+
p3 <- ggplot2::ggplot(res %>% as.data.frame() %>% dplyr::mutate(outlier=is.na(res$pvalue))) +
261+
ggplot2::aes(x=log10(baseMean), y=lfcSE, color=outlier) +
262+
ggplot2::geom_point()
263+
print(p3)
232264

233265
lcdbwf:::folded_markdown(text$results_diagnostics$lfcse_lfc, "Help")
234-
p <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) +
235-
aes(x=log2FoldChange, y=lfcSE, color=outlier) +
236-
geom_point()
237-
print(p)
266+
p4 <- ggplot2::ggplot(res %>% as.data.frame() %>% dplyr::mutate(outlier=is.na(res$pvalue))) +
267+
ggplot2::aes(x=log2FoldChange, y=lfcSE, color=outlier) +
268+
ggplot2::geom_point()
269+
print(p4)
270+
271+
# Save plots to a list and return for testing
272+
plots <- list(p1=p1, p2=p2, p3=p3, p4=p4)
273+
return(plots)
238274
}

lib/lcdbwf/R/dds.R

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,17 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL,
5252
...){
5353

5454
# Note we're using pluck() here for the convenience of setting defaults
55-
5655

5756
coldata <- purrr::pluck(design_data, 'sampletable')
5857
design <- purrr::pluck(design_data, 'design')
58+
test <- purrr::pluck(design_data, 'test', .default='Wald')
59+
if (!(test %in% c('Wald', 'LRT'))){
60+
stop(paste("Valid options for test are 'Wald' (default) or 'LRT'. You chose,", test))
61+
}
62+
reduced_design <- purrr::pluck(design_data, 'reduced_design')
63+
if (!is.null(reduced_design) && test != 'LRT') {
64+
stop("You included a reduced design formula but did not specify test = 'LRT'")
65+
}
5966
location <- purrr::pluck(design_data, 'filename', .default=featureCounts)
6067
salmon <- purrr::pluck(design_data, 'salmon')
6168
kallisto <- purrr::pluck(design_data, 'kallisto')
@@ -121,8 +128,17 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL,
121128
dds <- lcdbwf:::collapseReplicates2(dds, dds[[collapse_by]])
122129
}
123130

124-
dds <- DESeq(dds, ...)
125-
return(dds)
131+
# Check if we need to perform the LRT on the dds object
132+
if (test == 'Wald') {
133+
dds <- DESeq2::DESeq(dds, test=test, ...)
134+
return(dds)
135+
} else if (test == 'LRT') {
136+
if (is.null(reduced_design)){
137+
stop("When using LRT, reduced_design must be provided")
138+
}
139+
dds <- DESeq2::DESeq(dds, test=test, reduced=reduced_design, ...)
140+
return(dds)
141+
}
126142
}
127143

128144
#' Strip dotted version off of the rownames of a dds object
@@ -160,7 +176,7 @@ strip_dotted_version_from_dds <- function(dds, force=FALSE){
160176
#' biological replicate (e.g., dds$biorep)
161177
collapseReplicates2 <- function(object, groupby){
162178
collapsed <- DESeq2::collapseReplicates(object, groupby)
163-
colData(collapsed)[,1] <- rownames(colData(collapsed))
179+
SummarizedExperiment::colData(collapsed)[,1] <- rownames(SummarizedExperiment::colData(collapsed))
164180
return(collapsed)
165181
}
166182

@@ -190,21 +206,21 @@ dds_diagnostics <- function(dds_list, text){
190206
p <- assays(dds_list[[name]])[['cooks']] %>%
191207
as.data.frame() %>%
192208
tidyr::pivot_longer(everything()) %>%
193-
ggplot() +
194-
aes(x=name, y=log10(value)) +
195-
geom_boxplot() +
196-
ylab("log10(Cook's distance)")
209+
ggplot2::ggplot() +
210+
ggplot2::aes(x=name, y=log10(value)) +
211+
ggplot2::geom_boxplot() +
212+
ggplot2::ylab("log10(Cook's distance)")
197213
print(p)
198214

199215
mdcat("#### colData")
200216
mdcat(text$dds_diagnostics$colData)
201-
cdata <- colData(dds_list[[name]]) %>% as.data.frame
217+
cdata <- SummarizedExperiment::colData(dds_list[[name]]) %>% as.data.frame
202218
cdata <- cdata[, !grepl("filename", colnames(cdata))]
203219
print(htmltools::tagList(datatable(cdata)))
204220

205221
mdcat("#### Design matrix")
206222
mdcat(text$dds_diagnostics$design_matrix, " The design is: `", deparse(design(dds_list[[name]])), "`")
207-
mmat <- model.matrix(design(dds_list[[name]]), data=colData(dds_list[[name]])) %>% as.data.frame()
223+
mmat <- model.matrix(design(dds_list[[name]]), data=SummarizedExperiment::colData(dds_list[[name]])) %>% as.data.frame()
208224
print(htmltools::tagList(datatable(mmat)))
209225
}
210226
}

0 commit comments

Comments
 (0)