Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docker/lr-metrics/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ ENV PATH=/miniconda/bin/:/miniconda/envs/lr-metrics/bin/:/root/google-cloud-sdk/

# install conda packages
COPY ./environment.yml /
RUN conda install -n base conda-libmamba-solver && conda config --set solver libmamba
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with how mamba helps dependencies, in particular where it should be installed.
But if you look at the environment.yml, it's trying to create an env named lr-metrics, and here you're installing mamba into the base env.
Is this usually what people do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has been reverted, so it's no longer relevant, but here's some info for posterity.
I tried to build this docker on a desktop with 16Gb of memory. No dice. If I was lucky it just got OOM'd after a few hours. So I ran out to microcenter and bought 32Gb of memory. After leaving it overnight it was still trying to solve the environment. Added libmamba and the docker built promptly (a few minutes--don't remember exactly) using little memory. Don't know about the correctness of the environments (but I'd think you'd want the solver in the base environment).

RUN conda env create -f /environment.yml && conda clean -a

# install gatk
# install super-special version of gatk
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've created a ticket for this to help us track #427

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current version eliminates it.

RUN git clone https://github.com/broadinstitute/gatk.git -b kvg_pbeap \
&& cd gatk \
&& git checkout c9497220ef13beb05da7c7a820c181be00b9b817 \
Expand All @@ -30,6 +31,9 @@ RUN git clone https://github.com/broadinstitute/gatk.git -b kvg_pbeap \
# install picard
RUN wget -O /usr/local/bin/picard.jar https://github.com/broadinstitute/picard/releases/download/2.22.1/picard.jar

# install gsutil
RUN curl -sSL https://sdk.cloud.google.com | bash
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated to this command, just a convention in this repo.

We usually bump the version in the companion make file when the Dockerfile/environment file are changed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted.


# install various metric and visualization scripts
COPY lima_report_detail.R /
COPY lima_report_summary.R /
Expand Down
1 change: 1 addition & 0 deletions docker/lr-metrics/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ channels:
- bioconda
- r
dependencies:
- conda-forge::ncurses
- samtools
- bedtools
- java-jdk
Expand Down
2 changes: 1 addition & 1 deletion docker/lr-mosdepth/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ RUN conda env create -f /environment.yml && conda clean -a
ENV PATH=/opt/conda/envs/lr-mosdepth/bin/:/root/google-cloud-sdk/bin/:${PATH}

RUN apt-get -y update && \
apt-get -y install curl zlib1g-dev libcurl4-openssl-dev libbz2-dev liblzma-dev && \
apt-get -y install curl zlib1g-dev libcurl4-openssl-dev libbz2-dev liblzma-dev datamash bsdmainutils && \
apt-get clean

# install gsutil
Expand Down
7 changes: 7 additions & 0 deletions test/test_data/AlignedMetrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"AlignedMetrics.aligned_bam": "gs://broad-dsp-lrma-cromwell/test_data/aligned-metrics/NA24385.bam",
"AlignedMetrics.aligned_bai": "gs://broad-dsp-lrma-cromwell/test_data/aligned-metrics/NA24385.bam.bai",
"AlignedMetrics.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
"AlignedMetrics.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
"AlignedMetrics.gcs_output_dir": "gs://broad-dsp-lrma-ci/test-outputs"
}
2 changes: 1 addition & 1 deletion wdl/deprecated/ONT10x.wdl
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
version 1.0

import "../structs/Structs.wdl"
import "../tasks/Utility/Utils.wdl" as Utils
import "../tasks/Utility/ONTUtils.wdl" as ONT
import "tasks/C3POa.wdl" as C3
import "../tasks/Alignment/AlignReads.wdl" as AR
import "../tasks/QC/AlignedMetrics.wdl" as AM
import "../tasks/Utility/Finalize.wdl" as FF

workflow ONT10x {
Expand Down
1 change: 0 additions & 1 deletion wdl/deprecated/PBCCS10x.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ version 1.0

import "../tasks/Utility/PBUtils.wdl" as PB
import "../tasks/Utility/Utils.wdl" as Utils
import "../tasks/Alignment/AlignReads.wdl" as AR
import "../tasks/QC/AlignedMetrics.wdl" as AM
import "tasks/AnnotateAdapters.wdl" as AA
import "../tasks/Utility/Finalize.wdl" as FF
Expand Down
1 change: 0 additions & 1 deletion wdl/deprecated/PBCCSDemultiplexWholeGenome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ version 1.0

import "../tasks/Utility/PBUtils.wdl" as PB
import "../tasks/Utility/Utils.wdl" as Utils
import "../tasks/Alignment/AlignReads.wdl" as AR
import "../tasks/QC/AlignedMetrics.wdl" as AM
import "../tasks/VariantCalling/CallVariantsPBCCS.wdl" as VAR
import "../tasks/Utility/Finalize.wdl" as FF
Expand Down
1 change: 0 additions & 1 deletion wdl/deprecated/PBCLRDemultiplexWholeGenome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ version 1.0

import "../tasks/Utility/PBUtils.wdl" as PB
import "../tasks/Utility/Utils.wdl" as Utils
import "../tasks/Alignment/AlignReads.wdl" as AR
import "../tasks/QC/AlignedMetrics.wdl" as AM
import "../tasks/Utility/Finalize.wdl" as FF

Expand Down
1 change: 0 additions & 1 deletion wdl/deprecated/tasks/C3POa.wdl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
version 1.0

import "../../structs/Structs.wdl"
import "../../tasks/Utility/Utils.wdl" as Utils

workflow C3POa {
input {
Expand Down
1 change: 1 addition & 0 deletions wdl/deprecated/tasks/GATKBestPractice.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ version 1.0
# "https://github.com/PacificBiosciences/hg002-ccs/"
##########################################################################################

import "../../structs/Structs.wdl"
import "dsde_pipelines_tasks/GermlineVariantDiscovery.wdl" as Calling
import "dsde_pipelines_tasks/Qc.wdl" as QC
import "dsde_pipelines_tasks/Utilities.wdl" as DSDEPipelinesUtils
Expand Down
183 changes: 183 additions & 0 deletions wdl/deprecated/tasks/Scrapbook.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
version 1.0

task CoverageTrack {
input {
File bam
File bai
String chr
String start
String end

RuntimeAttr? runtime_attr_override
}

String basename = basename(bam, ".bam")
Int disk_size = 2*ceil(size(bam, "GB") + size(bai, "GB"))

command <<<
set -euxo pipefail

samtools depth -a ~{bam} -r ~{chr}:~{start}-~{end} | bgzip > ~{basename}.coverage.~{chr}_~{start}_~{end}.txt.gz
tabix -p bed ~{basename}.coverage.~{chr}_~{start}_~{end}.txt.gz
>>>

output {
File coverage = "~{basename}.coverage.~{chr}_~{start}_~{end}.txt.gz"
File coverage_tbi = "~{basename}.coverage.~{chr}_~{start}_~{end}.txt.gz.tbi"
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 4,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-metrics:0.1.11"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}

task FilterMQ0Reads {
input {
File bam

RuntimeAttr? runtime_attr_override
}

Int disk_size = 2*ceil(size(bam, "GB"))
String prefix = basename(bam, ".bam")

command <<<
set -euxo pipefail

samtools view -q 1 -b ~{bam} > ~{prefix}.no_mq0.bam
samtools index ~{prefix}.no_mq0.bam
>>>

output {
File no_mq0_bam = "~{prefix}.no_mq0.bam"
File no_mq0_bai = "~{prefix}.no_mq0.bam.bai"
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 2,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-metrics:0.1.11"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}

task ComputeBedCoverage {
input {
File bam
File bai
File bed
String prefix

RuntimeAttr? runtime_attr_override
}

Int disk_size = 2*ceil(size(bam, "GB") + size(bai, "GB") + size(bed, "GB"))

command <<<
set -euxo pipefail

bedtools coverage -b ~{bed} -a ~{bam} -nobuf | gzip > ~{prefix}.txt.gz
zcat ~{prefix}.txt.gz | awk '{ sum += sprintf("%f", $15*$16) } END { printf("%f\n", sum) }' > ~{prefix}.count.txt
>>>

output {
File coverage = "~{prefix}.txt.gz"
Float counts = read_float("~{prefix}.count.txt")
File counts_file = "~{prefix}.count.txt"
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 2,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-metrics:0.1.11"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}

task BamToBed {
input {
File bam
File bai

RuntimeAttr? runtime_attr_override
}

String bed = basename(bam, ".bam") + ".bed"
Int disk_size = 4*ceil(size(bam, "GB") + size(bai, "GB"))

command <<<
set -euxo pipefail

bedtools bamtobed -i ~{bam} > ~{bed}
>>>

output {
File bedfile = bed
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 2,
mem_gb: 8,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-metrics:0.1.11"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
5 changes: 2 additions & 3 deletions wdl/pipelines/ONT/Epigenomics/ONTMethylation.wdl
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
version 1.0

import "../../../structs/Structs.wdl"
import "../../../tasks/Utility/Utils.wdl" as Utils
import "../../../tasks/Utility/ONTUtils.wdl" as ONTUtils
import "../../../tasks/Utility/VariantUtils.wdl"
import "../../../tasks/Preprocessing/Guppy.wdl" as Guppy
import "../../../tasks/Utility/Finalize.wdl" as FF

workflow ONTMethylation {
Expand Down Expand Up @@ -733,4 +732,4 @@ task CallHaploidVariants {
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
}
3 changes: 1 addition & 2 deletions wdl/pipelines/ONT/MultiAnalysis/ONTPfHrp2Hrp3Status.wdl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
version 1.0

import "../../../structs/Structs.wdl"
import "../../../tasks/Utility/Finalize.wdl" as FF

workflow ONTPfHrp2Hrp3Status {

Expand Down Expand Up @@ -86,7 +85,7 @@ task IsLocusDeleted {
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-mosdepth:0.3.1"
docker: "us.gcr.io/broad-dsp-lrma/lr-mosdepth:0.3.2"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know we have 0.3.2 on GCR, but given that the main branch is using 0.3.1, we can do one of two things here:

  • not updating the version here
  • find out what 0.3.2 changed from 0.3.1. I see you included several more packages in lr-mosdepth, so if you built that, then let's also bump the version in the makefile in that docker foler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makefile version bumped.

}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
Expand Down
1 change: 0 additions & 1 deletion wdl/pipelines/ONT/Preprocessing/ONTBasecall.wdl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
version 1.0

import "../../../tasks/Preprocessing/Guppy.wdl" as Guppy
import "../../../tasks/Utility/Finalize.wdl" as FF

workflow ONTBasecall {

Expand Down
1 change: 0 additions & 1 deletion wdl/pipelines/ONT/VariantCalling/ONTWholeGenome.wdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
version 1.0

import "../../../tasks/Utility/ONTUtils.wdl" as ONT
import "../../../tasks/Utility/Utils.wdl" as Utils
import "../../../tasks/VariantCalling/CallVariantsONT.wdl" as VAR
import "../../../tasks/Utility/Finalize.wdl" as FF
Expand Down
7 changes: 5 additions & 2 deletions wdl/pipelines/PacBio/Alignment/PBFlowcell.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ import "../../../tasks/Preprocessing/Longbow.wdl" as Longbow
import "../../../tasks/QC/AlignedMetrics.wdl" as AM
import "../../../tasks/Visualization/NanoPlot.wdl" as NP
import "../../../tasks/Utility/Finalize.wdl" as FF

import "../../../tasks/Transcriptomics/MASSeq.wdl" as MAS

import "../../../tasks/Utility/JupyterNotebooks.wdl" as JUPYTER

struct DataTypeParameters {
Int num_shards
String map_preset
}

workflow PBFlowcell {

Expand Down
1 change: 0 additions & 1 deletion wdl/pipelines/PacBio/Utility/PBCCSIsoSeq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ version 1.0

import "../../../tasks/Utility/PBUtils.wdl" as PB
import "../../../tasks/Utility/Utils.wdl" as Utils
import "../../../tasks/Alignment/AlignReads.wdl" as AR
import "../../../tasks/Utility/Finalize.wdl" as FF

workflow PBCCSIsoSeq {
Expand Down
2 changes: 0 additions & 2 deletions wdl/pipelines/PacBio/Utility/PBMASIsoSeqQuantify.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@ version 1.0

import "../../../tasks/Utility/Utils.wdl" as Utils
import "../../../tasks/Utility/PBUtils.wdl" as PB
import "../../../tasks/Alignment/AlignReads.wdl" as AR
import "../../../tasks/Utility/StringTie2.wdl" as ST2
import "../../../tasks/Transcriptomics/MASSeq.wdl" as MAS
import "../../../tasks/Transcriptomics/UMI_Tools.wdl" as UMI_TOOLS
import "../../../tasks/Transcriptomics/Preprocessing_Tasks.wdl" as TX_PRE
import "../../../tasks/Transcriptomics/Postprocessing_Tasks.wdl" as TX_POST
import "../../../tasks/Utility/Finalize.wdl" as FF
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
version 1.0

import "../../../tasks/Utility/PBUtils.wdl" as PB
import "../../../tasks/Utility/Utils.wdl" as Utils
import "../../../tasks/Utility/StringTie2.wdl"
import "../../../tasks/Utility/Finalize.wdl" as FF

Expand Down
2 changes: 0 additions & 2 deletions wdl/pipelines/TechAgnostic/Utility/ConvertToHailMT.wdl
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
version 1.0

import "../../../tasks/VariantCalling/GLNexus.wdl" as GLNexus
import "../../../tasks/Utility/Hail.wdl" as Hail
import "../../../tasks/Utility/Finalize.wdl" as FF

workflow ConvertToHailMT {

Expand Down
1 change: 1 addition & 0 deletions wdl/pipelines/TechAgnostic/Utility/DownloadFromSRA.wdl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
version 1.0

import "../../../structs/Structs.wdl"
import "../../../tasks/Utility/Utils.wdl" as Utils

workflow DownloadFromSRA {
Expand Down
1 change: 1 addition & 0 deletions wdl/pipelines/TechAgnostic/Utility/DownloadFromWeb.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ version 1.0
# reimagining of the Nextflow/AWS downloading pipeline from @alaincoletta (see: http://broad.io/aws_dl).
##########################################################################################

import "../../../structs/Structs.wdl"
import "../../../tasks/Utility/Utils.wdl" as Utils

workflow DownloadFromWeb {
Expand Down
Loading