diff --git a/.github/workflows/rsem1.2.21docker-publish.yml b/.github/workflows/rsem1.2.21docker-publish.yml new file mode 100644 index 0000000..f876508 --- /dev/null +++ b/.github/workflows/rsem1.2.21docker-publish.yml @@ -0,0 +1,90 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: + #schedule: + #- cron: '37 6 * * *' + push: + branches: [ "master" ] + # Publish semver tags as releases. + #tags: [ 'v*.*.*' ] + pull_request: + branches: [ "master" ] + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }}/tools/rsem + + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + # - name: Install cosign + # if: github.event_name != 'pull_request' + # uses: sigstore/cosign-installer@d6a3abf1bdea83574e28d40543793018b6035605 + # with: + # cosign-release: 'v1.7.1' + + + # Workaround: https://github.com/docker/build-push-action/issues/461 + - name: Setup Docker buildx + uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + + # - name: Log into registry Docker hub + #if: github.event_name != 'pull_request' + # uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + # with: + # username: ${{ secrets.DOCKERHUB_USERNAME }} + # password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: Log in to the Container registry + uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + #docker.io/reddylab/rsem + tags: latest + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a + with: + context: ./tools/rsem/1.2.21/ + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/star-utils.yml b/.github/workflows/star-utils.yml new file mode 100644 index 0000000..dd82456 --- /dev/null +++ b/.github/workflows/star-utils.yml @@ -0,0 +1,89 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation + +on: + #schedule: + # - cron: '43 13 * * *' + push: + branches: [ "master" ] + # Publish semver tags as releases. + # tags: [ v*.*.*] + pull_request: + branches: [ "master" ] + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + + # github.repository as / + IMAGE_NAME: + ${{ github.repository }}/tools/star-utils + +jobs: + build: + + name: Push Docker image to multiple registries + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + #- name: Install cosign + #if: github.event_name != 'pull_request' + # uses: sigstore/cosign-installer@d6a3abf1bdea83574e28d40543793018b6035605 + # with: + # cosign-release: 'v1.7.1' + + # Workaround: https://github.com/docker/build-push-action/issues/461 + - name: Setup Docker buildx + uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + # - name: Log into Docker hub + #if: github.event_name != 'pull_request' + #uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + #with: + # username: ${{ secrets.DOCKER_USERNAME }} + # password: ${{ secrets.DOCKER_TOKEN }} + + - name: Log in to the Container registry + uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: latest + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a + with: + context: ./tools/star-utils/ + #push: ${{ github.event_name != 'pull_request' }} + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/windowtrimmer.yml b/.github/workflows/windowtrimmer.yml new file mode 100644 index 0000000..af021da --- /dev/null +++ b/.github/workflows/windowtrimmer.yml @@ -0,0 +1,101 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: + #schedule: + # - cron: '15 23 * * *' + push: + branches: [ "master" ] + # Publish semver tags as releases. + #tags: [ 'v*.*.*' ] + pull_request: + branches: [ "master" ] + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }}/tools/windowtrimmer + + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + #- name: Install cosign + # if: github.event_name != 'pull_request' + #uses: sigstore/cosign-installer@d6a3abf1bdea83574e28d40543793018b6035605 + #with: + # cosign-release: 'v1.7.1' + + + # Workaround: https://github.com/docker/build-push-action/issues/461 + - name: Setup Docker buildx + uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + # - name: Log into Docker hub + #if: github.event_name != 'pull_request' + #uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + #with: + # username: ${{ secrets.DOCKER_USERNAME }} + # password: ${{ secrets.DOCKER_TOKEN }} + + - name: Log in to the Container registry + #if: github.event_name != 'pull_request' + uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: latest + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a + with: + context: ./tools/windowtrimmer/ + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + # Sign the resulting Docker image digest except on PRs. + # This will only write to the public Rekor transparency log when the Docker + # repository is public to avoid leaking data. If you would like to publish + # transparency data even for private images, pass --force to cosign below. + # https://github.com/sigstore/cosign + #- name: Sign the published Docker image + # if: ${{ github.event_name != 'pull_request' }} + # env: + # COSIGN_EXPERIMENTAL: "true" + # This step uses the identity token to provision an ephemeral certificate + # against the sigstore community Fulcio instance. + # run: cosign sign ${{ steps.meta.outputs.tags }}@${{ steps.build-and-push.outputs.digest }} diff --git a/tools/rsem/1.2.21/Dockerfile b/tools/rsem/1.2.21/Dockerfile new file mode 100644 index 0000000..0be32e3 --- /dev/null +++ b/tools/rsem/1.2.21/Dockerfile @@ -0,0 +1,48 @@ +FROM phusion/baseimage:focal-1.2.0 +MAINTAINER Alejandro Barrera + +# Install dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + g++ \ + zlib1g-dev \ + libncurses-dev \ + curl \ + unzip + +######################################################### +# NOTE: Added here Bowtie image given that CWL # +# ^^^^ will not allow multiple Docker dependencies # +# at this point. # +######################################################### +# Installs bowtie from compiled distribution into /opt/bowtie +ENV BOWTIE_URL=http://sourceforge.net/projects/bowtie-bio/files/bowtie +ENV BOWTIE_RELEASE=0.12.9 +ENV DEST_DIR=/opt + +# Download Bowtie, unzip it and remove .zip file +RUN curl -SLo ${DEST_DIR}/bowtie-${BOWTIE_RELEASE}.zip ${BOWTIE_URL}/${BOWTIE_RELEASE}/bowtie-${BOWTIE_RELEASE}-linux-x86_64.zip/download && unzip ${DEST_DIR}/bowtie-${BOWTIE_RELEASE}.zip -d ${DEST_DIR} && rm ${DEST_DIR}/bowtie-${BOWTIE_RELEASE}.zip + +# Add bowtie path to the enviroment +ENV PATH=${DEST_DIR}/bowtie-${BOWTIE_RELEASE}:$PATH + +# Define ENV variables for download +ENV RSEM_RELEASE=1.2.21 +ENV RSEM_URL=https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_RELEASE}.tar.gz +#ENV RSEM_URL=https://github.com/deweylab/RSEM/releases/tag/v${RSEM_RELEASE}.tar.gz +ENV DEST_DIR=/opt + +# Download and unpack RSEM sources, make & install and remove unnecessary files +RUN curl -SLo ${DEST_DIR}/RSEM-${RSEM_RELEASE}.tar.gz ${RSEM_URL} && \ + tar xzvf ${DEST_DIR}/RSEM-${RSEM_RELEASE}.tar.gz -C ${DEST_DIR} && \ + cd ${DEST_DIR}/RSEM-${RSEM_RELEASE} && \ + sed -i.bak "s/CC = g++/CC = g++ -std=gnu++98/" Makefile && \ + make && \ + rm -rf ${DEST_DIR}/RSEM-${RSEM_RELEASE}.tar.gz + +# Add RSEM path to ENV path +ENV PATH=${DEST_DIR}/RSEM-${RSEM_RELEASE}/:$PATH + +# List executables provided by rsem + +CMD ["find", "/opt/", "-executable", "-type", "f", "-iname", "rsem*"] diff --git a/tools/star-utils/Dockerfile b/tools/star-utils/Dockerfile new file mode 100644 index 0000000..bd66c6e --- /dev/null +++ b/tools/star-utils/Dockerfile @@ -0,0 +1,11 @@ +FROM python:2.7 +MAINTAINER alejandro.barrera@duke.edu + +ADD src /src +WORKDIR /src + +RUN pip install -r /src/requirements.txt + +RUN chmod +x /src/create_SJ.out.tab.Pass1.conservative.sjdb.py +ENV PATH /src/:$PATH +CMD ["create_SJ.out.tab.Pass1.conservative.sjdb.py"] diff --git a/tools/star-utils/src/create_SJ.out.tab.Pass1.conservative.sjdb.py b/tools/star-utils/src/create_SJ.out.tab.Pass1.conservative.sjdb.py new file mode 100644 index 0000000..747d0eb --- /dev/null +++ b/tools/star-utils/src/create_SJ.out.tab.Pass1.conservative.sjdb.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +################################################## +# create_SJ.out.tab.Pass1.conservative.sjdb.py +# +# Merge the STAR 1-pass novel splice junction databases ('SJ.out.tab'). +# Save only those splice junctions in autosomes and sex chromosomes. +# Filter out splice junctions that are non-canonical, +# supported by only 10 or fewer reads. +# +################################################## +import pandas as pd +from collections import defaultdict +from sys import argv + +SJ_DBs = argv[1].split(',') +SJ_DB_out = argv[2] + +# samples = [s.replace(base, '').replace(suffix, '') for s in SJ_DBs] + +# canonical_chroms = set(['chr'+str(x) for x in range(1,24)]) +# canonical_chroms.add('chrX') +# canonical_chroms.add('chrY') + +strand_dict = {} +strand_dict[0],strand_dict[1],strand_dict[2] = '.','+','-' + +# SJ.out.tab format: + +# Column 1: chromosome +# Column 2: first base of the intron (1-based) +# Column 3: last base of the intron (1-based) +# Column 4: strand +# Column 5: intron motif: 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT +# Column 6: reserved +# Column 7: number of uniquely mapping reads crossing the junction +# Column 8: reserved +# Column 9: maximum left/right overhang + +splice_dict = defaultdict(dict) +for i, SJ_DB in enumerate(SJ_DBs): + columns = ['chrom','start','end','strand','intron_motif','reserved','num_uniq_mapped_reads','reserved','left/right_overhang'] + SJ_DB = pd.read_csv(SJ_DB, delim_whitespace=True, header=None, names = columns) + # filter out non-canonical splice juntions + SJ_DB = SJ_DB[SJ_DB.intron_motif != 0] + # filter out splice junctions without support of greater than 1 unique read + SJ_DB = SJ_DB[SJ_DB.num_uniq_mapped_reads > 1] + SJ_DB['strand'] = [strand_dict[strand] for strand in SJ_DB['strand']] + for chrom, start, end, strand in zip(SJ_DB.chrom, SJ_DB.start, SJ_DB.end, SJ_DB.strand): + if (chrom,start,end,strand) in splice_dict: + splice_dict[(chrom,start,end,strand)] += 1 + else: + splice_dict[(chrom,start,end,strand)] = 1 + +splice_sites = splice_dict.keys() +splice_sites = sorted(splice_sites, key=lambda s: int(s[1])) +splice_sites = sorted(splice_sites, key=lambda s: s[0][3:]) +# splice_sites = [splice_site for splice_site in splice_sites if splice_site[0] in canonical_chroms] + +splice_sites = ['\t'.join([str(x) for x in splice_site]) for splice_site in splice_sites] +out = open(SJ_DB_out,'w') +out.write('\n'.join(splice_sites) + '\n') +out.close() diff --git a/tools/star-utils/src/requirements.txt b/tools/star-utils/src/requirements.txt new file mode 100644 index 0000000..b06e5c2 --- /dev/null +++ b/tools/star-utils/src/requirements.txt @@ -0,0 +1 @@ +pandas==0.16.2 diff --git a/tools/windowtrimmer/Dockerfile b/tools/windowtrimmer/Dockerfile new file mode 100644 index 0000000..f8ec77e --- /dev/null +++ b/tools/windowtrimmer/Dockerfile @@ -0,0 +1,10 @@ +FROM python:2.7 +MAINTAINER alejandro.barrera@duke.edu + +ADD src /src +WORKDIR /src + +RUN chmod +x /src/windowTrimmer.py +ENV PATH /src/:$PATH + +CMD ["windowTrimmer.py"] diff --git a/tools/windowtrimmer/src/windowTrimmer.py b/tools/windowtrimmer/src/windowTrimmer.py new file mode 100644 index 0000000..1b1c157 --- /dev/null +++ b/tools/windowtrimmer/src/windowTrimmer.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python2.7 + +# Written by Thomas Konneker, adapted by Ian McDowell and Alejandro Barrera +# This script is intended to crawl through bed files to filter +# out reads that are too concentrated within a specified base-pair +# window. This is presumably because they are PCR artifacts. +# See Boyle et. al 2011, Genome Research +# Currently this program does not 'window' in the traditional sense +# of tiling out the genome and then evaluating those +# windows. Instead starts a window at the next place there is an entry +# that follows the previous window. + +# Modified 7-1-2014 to include requirement for 2 reads minimum for removal of putative artifact. +# Modified 4-25-2016 to reformat code including option to specify a BED file instead of reading stdin. + +import sys +from optparse import OptionParser +from collections import Counter + + +class BedEntry: + def __init__(self, chromosome, start, end, name, + score, strand, fullBedLine): + self.chromosome = str(chromosome) + self.startposition = int(start) + self.endposition = int(end) + self.name = name + self.score = score + self.strand = strand + self.fullBedLine = fullBedLine + + +class BedReader: + """BED file reader object. For parsing and yielding bed entries from a file.""" + + def __init__(self, inFile): + self.inFile = inFile + + def spitLines(self): + for line in self.inFile: + fullBedLine = line.strip('\n\r') + wholeline = fullBedLine.split('\t') + bedLine = BedEntry(wholeline[0], wholeline[1], wholeline[2], + wholeline[3], wholeline[4], wholeline[5], fullBedLine) + yield bedLine + + +def windowChooser(position_list, cutoff): + """Evaluate whether a list of bed is too concentrated at a single position, given a concentration threshold. + Returns: boolean. + """ + position_list, cutoff = position_list, cutoff + if position_list: + count = Counter(position_list) + peak = float(count.most_common()[0][1]) + if peak / len(position_list) >= cutoff and peak >= 2: + return 0 + else: + return 1 + else: + return 1 + + +def windowizer(bedIn, windowsize, cutoff): + """Compute bedlines that fall under the cutoff criteria for concentration of reads to a single base pair within a window.""" + + windowend = 0 + current_chr = "chr1" + full_bed_list = [] + position_list = [] + for line in bedIn.spitLines(): + if line.startposition > windowend: + windowstart = line.startposition + windowend = windowstart + windowsize + if windowChooser(position_list, cutoff): + for item in full_bed_list: + print item + position_list = [line.startposition] + full_bed_list = [line.fullBedLine] + elif line.startposition <= windowend: + if line.chromosome == current_chr: + position_list.append(line.startposition) + full_bed_list.append(line.fullBedLine) + else: + current_chr = line.chromosome + windowstart = line.startposition + windowend = windowstart + windowsize + if windowChooser(position_list, cutoff): + for item in full_bed_list: + print item + position_list = [line.startposition] + full_bed_list = [line.fullBedLine] + else: + if windowChooser(position_list, cutoff): + for item in full_bed_list: + print item + + +def main(): + parser = OptionParser() + parser.add_option('-w', '--windowsize', type='int', + dest='windowsize', default=31, + help='threshold for trimming reads that have \ + the same mapping start position') + parser.add_option('-c', '--cutoff', type="float", + dest='cutoff', default=0.70, + help="threshold for concentration at a single base \ + within a window to cutoff") + parser.add_option('-i', '--infile', type="str", dest='infile', + help="Input BED file to be scanned and filtered") + + (options, args) = parser.parse_args() + + windowsize = options.windowsize + cutoff = options.cutoff + infile_name = options.infile + + # parse command line + if infile_name: + infile = open(infile_name) + else: + infile = sys.stdin + entry = BedReader(infile) + windowizer(entry, windowsize, cutoff) + + if infile_name is not sys.stdin: + infile.close() + + +if __name__ == '__main__': + main()