diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml
new file mode 100644
index 0000000..44f150a
--- /dev/null
+++ b/.github/workflows/draft-pdf.yml
@@ -0,0 +1,24 @@
+name: Draft PDF
+on:
+  push:
+    paths:
+      - paper/**
+      - .github/workflows/draft-pdf.yml
+
+jobs:
+  paper:
+    runs-on: ubuntu-latest
+    name: Paper Draft
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Build draft PDF
+        uses: openjournals/openjournals-draft-action@master
+        with:
+          journal: joss
+          paper-path: paper/paper.md # Path to the paper within your repo
+      - name: Upload
+        uses: actions/upload-artifact@v4
+        with:
+          name: paper
+          path: paper/paper.pdf # Output path where Pandoc writes the compiled PDF
\ No newline at end of file
diff --git a/paper/paper.bib b/paper/paper.bib
new file mode 100644
index 0000000..652ea32
--- /dev/null
+++ b/paper/paper.bib
@@ -0,0 +1,432 @@
+
+@article{delahaye_sequencing_2021,
+	title = {Sequencing {DNA} with nanopores: {Troubles} and biases},
+	volume = {16},
+	issn = {1932-6203},
+	shorttitle = {Sequencing {DNA} with nanopores},
+	url = {https://dx.plos.org/10.1371/journal.pone.0257521},
+	doi = {10.1371/journal.pone.0257521},
+	abstract = {Oxford Nanopore Technologies’ (ONT) long read sequencers offer access to longer DNA fragments than previous sequencer generations, at the cost of a higher error rate. While many papers have studied read correction methods, few have addressed the detailed characterization of observed errors, a task complicated by frequent changes in chemistry and software in ONT technology. The MinION sequencer is now more stable and this paper proposes an up-to-date view of its error landscape, using the most mature flowcell and basecaller. We studied Nanopore sequencing error biases on both bacterial and human DNA reads. We found that, although Nanopore sequencing is expected not to suffer from GC bias, it is a crucial parameter with respect to errors. In particular, low-GC reads have fewer errors than high-GC reads (about 6\% and 8\% respectively). The error profile for homopolymeric regions or regions with short repeats, the source of about half of all sequencing errors, also depends on the GC rate and mainly shows deletions, although there are some reads with long insertions. Another interesting finding is that the quality measure, although over-estimated, offers valuable information to predict the error rate as well as the abundance of reads. We supplemented this study with an analysis of a rapeseed RNA read set and shown a higher level of errors with a higher level of deletion in these data. Finally, we have implemented an open source pipeline for long-term monitoring of the error profile, which enables users to easily compute various analysis presented in this work, including for future developments of the sequencing device. Overall, we hope this work will provide a basis for the design of better error-correction methods.},
+	language = {en},
+	number = {10},
+	urldate = {2025-04-08},
+	journal = {PLOS ONE},
+	author = {Delahaye, Clara and Nicolas, Jacques},
+	editor = {Andrés-León, Eduardo},
+	month = oct,
+	year = {2021},
+	pages = {e0257521},
+	annote = {An up-to-date view of Nanopore sequencing error biases on both bacterial and human DNA reads is proposed and an open source pipeline for long-term monitoring of the error profile is implemented, which enables users to easily compute various analysis presented in this work.
+
+},
+	file = {Full Text:/uni-mainz.de/homes/meesters/Zotero/storage/QBXQJMVW/Delahaye and Nicolas - 2021 - Sequencing DNA with nanopores Troubles and biases.pdf:application/pdf},
+}
+
+@article{dong_long_2021,
+	title = {The long and the short of it: unlocking nanopore long-read {RNA} sequencing data with short-read differential expression analysis tools},
+	volume = {3},
+	copyright = {http://creativecommons.org/licenses/by/4.0/},
+	issn = {2631-9268},
+	shorttitle = {The long and the short of it},
+	url = {https://academic.oup.com/nargab/article/doi/10.1093/nargab/lqab028/6253498},
+	doi = {10.1093/nargab/lqab028},
+	abstract = {Abstract
+            Application of Oxford Nanopore Technologies’ long-read sequencing platform to transcriptomic analysis is increasing in popularity. However, such analysis can be challenging due to the high sequence error and small library sizes, which decreases quantification accuracy and reduces power for statistical testing. Here, we report the analysis of two nanopore RNA-seq datasets with the goal of obtaining gene- and isoform-level differential expression information. A dataset of synthetic, spliced, spike-in RNAs (‘sequins’) as well as a mouse neural stem cell dataset from samples with a null mutation of the epigenetic regulator Smchd1 was analysed using a mix of long-read specific tools for preprocessing together with established short-read RNA-seq methods for downstream analysis. We used limma-voom to perform differential gene expression analysis, and the novel FLAMES pipeline to perform isoform identification and quantification, followed by DRIMSeq and limma-diffSplice (with stageR) to perform differential transcript usage analysis. We compared results from the sequins dataset to the ground truth, and results of the mouse dataset to a previous short-read study on equivalent samples. Overall, our work shows that transcriptomic analysis of long-read nanopore data using long-read specific preprocessing methods together with short-read differential expression methods and software that are already in wide use can yield meaningful results.},
+	language = {en},
+	number = {2},
+	urldate = {2025-04-08},
+	journal = {NAR Genomics and Bioinformatics},
+	author = {Dong, Xueyi and Tian, Luyi and Gouil, Quentin and Kariyawasam, Hasaru and Su, Shian and De Paoli-Iseppi, Ricardo and Prawer, Yair David Joseph and Clark, Michael B and Breslin, Kelsey and Iminitoff, Megan and Blewitt, Marnie E and Law, Charity W and Ritchie, Matthew E},
+	month = apr,
+	year = {2021},
+	pages = {lqab028},
+	annote = {This work shows that transcriptomic analysis of long-read nanopore data using long- read specific preprocessing methods together with short-read differential expression methods and software that are already in wide use can yield meaningful results.
+
+},
+	file = {Full Text:/uni-mainz.de/homes/meesters/Zotero/storage/76BCB677/Dong et al. - 2021 - The long and the short of it unlocking nanopore l.pdf:application/pdf},
+}
+
+@article{tang_full-length_2020-1,
+	title = {Full-length transcript characterization of {SF3B1} mutation in chronic lymphocytic leukemia reveals downregulation of retained introns},
+	volume = {11},
+	copyright = {2020 The Author(s)},
+	issn = {2041-1723},
+	url = {https://www.nature.com/articles/s41467-020-15171-6},
+	doi = {10.1038/s41467-020-15171-6},
+	abstract = {While splicing changes caused by somatic mutations in SF3B1 are known, identifying full-length isoform changes may better elucidate the functional consequences of these mutations. We report nanopore sequencing of full-length cDNA from CLL samples with and without SF3B1 mutation, as well as normal B cell samples, giving a total of 149 million pass reads. We present FLAIR (Full-Length Alternative Isoform analysis of RNA), a computational workflow to identify high-confidence transcripts, perform differential splicing event analysis, and differential isoform analysis. Using nanopore reads, we demonstrate differential 3’ splice site changes associated with SF3B1 mutation, agreeing with previous studies. We also observe a strong downregulation of intron retention events associated with SF3B1 mutation. Full-length transcript analysis links multiple alternative splicing events together and allows for better estimates of the abundance of productive versus unproductive isoforms. Our work demonstrates the potential utility of nanopore sequencing for cancer and splicing research.},
+	language = {en},
+	number = {1},
+	urldate = {2024-11-10},
+	journal = {Nature Communications},
+	author = {Tang, Alison D. and Soulette, Cameron M. and van Baren, Marijke J. and Hart, Kevyn and Hrabeta-Robinson, Eva and Wu, Catherine J. and Brooks, Angela N.},
+	month = mar,
+	year = {2020},
+	note = {Number: 1
+Publisher: Nature Publishing Group},
+	keywords = {Genome informatics, Cancer genomics, High-throughput screening, RNA splicing},
+	pages = {1438},
+}
+
+@article{altschul_basic_1990,
+	title = {Basic local alignment search tool},
+	volume = {215},
+	copyright = {https://www.elsevier.com/tdm/userlicense/1.0/},
+	issn = {00222836},
+	url = {https://linkinghub.elsevier.com/retrieve/pii/S0022283605803602},
+	doi = {10.1016/S0022-2836(05)80360-2},
+	language = {en},
+	number = {3},
+	urldate = {2025-04-08},
+	journal = {Journal of Molecular Biology},
+	author = {Altschul, Stephen F. and Gish, Warren and Miller, Webb and Myers, Eugene W. and Lipman, David J.},
+	month = oct,
+	year = {1990},
+	pages = {403--410},
+}
+
+@article{camacho_blast_2009,
+	title = {{BLAST}+: architecture and applications},
+	volume = {10},
+	issn = {1471-2105},
+	shorttitle = {{BLAST}+},
+	url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-10-421},
+	doi = {10.1186/1471-2105-10-421},
+	abstract = {Abstract
+            
+              Background
+              Sequence similarity searching is a very important bioinformatics task. While Basic Local Alignment Search Tool (BLAST) outperforms exact methods through its use of heuristics, the speed of the current BLAST software is suboptimal for very long queries or database sequences. There are also some shortcomings in the user-interface of the current command-line applications.
+            
+            
+              Results
+              We describe features and improvements of rewritten BLAST software and introduce new command-line applications. Long query sequences are broken into chunks for processing, in some cases leading to dramatically shorter run times. For long database sequences, it is possible to retrieve only the relevant parts of the sequence, reducing CPU time and memory usage for searches of short queries against databases of contigs or chromosomes. The program can now retrieve masking information for database sequences from the BLAST databases. A new modular software library can now access subject sequence data from arbitrary data sources. We introduce several new features, including strategy files that allow a user to save and reuse their favorite set of options. The strategy files can be uploaded to and downloaded from the NCBI BLAST web site.
+            
+            
+              Conclusion
+              The new BLAST command-line applications, compared to the current BLAST tools, demonstrate substantial speed improvements for long queries as well as chromosome length database sequences. We have also improved the user interface of the command-line applications.},
+	language = {en},
+	number = {1},
+	urldate = {2025-04-08},
+	journal = {BMC Bioinformatics},
+	author = {Camacho, Christiam and Coulouris, George and Avagyan, Vahram and Ma, Ning and Papadopoulos, Jason and Bealer, Kevin and Madden, Thomas L},
+	month = dec,
+	year = {2009},
+	pages = {421},
+	file = {Full Text:/uni-mainz.de/homes/meesters/Zotero/storage/HUHRNLUM/Camacho et al. - 2009 - BLAST+ architecture and applications.pdf:application/pdf},
+}
+
+@ARTICLE{Hauswedell2024-ph,
+  title    = "Lambda3: homology search for protein, nucleotide, and
+              bisulfite-converted sequences",
+  author   = "Hauswedell, Hannes and Hetzel, Sara and Gottlieb, Simon G and
+              Kretzmer, Helene and Meissner, Alexander and Reinert, Knut",
+  abstract = "MOTIVATION: Local alignments of query sequences in large
+              databases represent a core part of metagenomic studies and
+              facilitate homology search. Following the development of NCBI
+              Blast, many applications aimed to provide faster and equally
+              sensitive local alignment frameworks. Most applications focus on
+              protein alignments, while only few also facilitate DNA-based
+              searches. None of the established programs allow searching DNA
+              sequences from bisulfite sequencing experiments commonly used for
+              DNA methylation profiling, for which specific alignment
+              strategies need to be implemented. RESULTS: Here, we introduce
+              Lambda3, a new version of the local alignment application Lambda.
+              Lambda3 is the first solution that enables the search of protein,
+              nucleotide as well as bisulfite-converted nucleotide query
+              sequences. Its protein mode achieves comparable performance to
+              that of the highly optimized protein alignment application
+              Diamond, while the nucleotide mode consistently outperforms
+              established local nucleotide aligners. Combined, Lambda3 presents
+              a universal local alignment framework that enables fast and
+              sensitive homology searches for a wide range of use-cases.
+              AVAILABILITY AND IMPLEMENTATION: Lambda3 is free and open-source
+              software publicly available at https://github.com/seqan/lambda/.",
+  journal  = "Bioinformatics",
+  volume   =  40,
+  number   =  3,
+  month    =  mar,
+  year     =  2024,
+  language = "en"
+}
+
+@article{molder_sustainable_2021,
+	title = {Sustainable data analysis with {Snakemake}},
+	volume = {10},
+	issn = {2046-1402},
+	url = {https://f1000research.com/articles/10-33/v2},
+	doi = {10.12688/f1000research.29032.2},
+	abstract = {Data analysis often entails a multitude of heterogeneous steps, from the application of various command line tools to the usage of scripting languages like R or Python for the generation of plots and tables. It is widely recognized that data analyses should ideally be conducted in a reproducible way. Reproducibility enables technical validation and regeneration of results on the original or even new data. However, reproducibility alone is by no means sufficient to deliver an analysis that is of lasting impact (i.e., sustainable) for the field, or even just one research group. We postulate that it is equally important to ensure adaptability and transparency. The former describes the ability to modify the analysis to answer extended or slightly different research questions. The latter describes the ability to understand the analysis in order to judge whether it is not only technically, but methodologically valid. Here, we analyze the properties needed for a data analysis to become reproducible, adaptable, and transparent. We show how the popular workflow management system Snakemake can be used to guarantee this, and how it enables an ergonomic, combined, unified representation of all steps involved in data analysis, ranging from raw data processing, to quality control and fine-grained, interactive exploration and plotting of final results.},
+	language = {en},
+	urldate = {2024-05-07},
+	journal = {F1000Research},
+	author = {Mölder, Felix and Jablonski, Kim Philipp and Letcher, Brice and Hall, Michael B. and Tomkins-Tinch, Christopher H. and Sochat, Vanessa and Forster, Jan and Lee, Soohyun and Twardziok, Sven O. and Kanitz, Alexander and Wilm, Andreas and Holtgrewe, Manuel and Rahmann, Sven and Nahnsen, Sven and Köster, Johannes},
+	month = apr,
+	year = {2021},
+	pages = {33},
+}
+
+@article{oleary_exploring_2024,
+	title = {Exploring and retrieving sequence and metadata for species across the tree of life with {NCBI} {Datasets}},
+	volume = {11},
+	issn = {2052-4463},
+	url = {https://www.nature.com/articles/s41597-024-03571-y},
+	doi = {10.1038/s41597-024-03571-y},
+	abstract = {Abstract
+            To explore complex biological questions, it is often necessary to access various data types from public data repositories. As the volume and complexity of biological sequence data grow, public repositories face significant challenges in ensuring that the data is easily discoverable and usable by the biological research community. To address these challenges, the National Center for Biotechnology Information (NCBI) has created NCBI Datasets. This resource provides straightforward, comprehensive, and scalable access to biological sequences, annotations, and metadata for a wide range of taxa. Following the FAIR (Findable, Accessible, Interoperable, and Reusable) data management principles, NCBI Datasets offers user-friendly web interfaces, command-line tools, and documented APIs, empowering researchers to access NCBI data seamlessly. The data is delivered as packages of sequences and metadata, thus facilitating improved data retrieval, sharing, and usability in research. Moreover, this data delivery method fosters effective data attribution and promotes its further reuse. This paper outlines the current scope of data accessible through NCBI Datasets and explains various options for exploring and downloading the data.},
+	language = {en},
+	number = {1},
+	urldate = {2025-04-08},
+	journal = {Scientific Data},
+	author = {O’Leary, Nuala A. and Cox, Eric and Holmes, J. Bradley and Anderson, W. Ray and Falk, Robert and Hem, Vichet and Tsuchiya, Mirian T. N. and Schuler, Gregory D. and Zhang, Xuan and Torcivia, John and Ketter, Anne and Breen, Laurie and Cothran, Jonathan and Bajwa, Hena and Tinne, Jovany and Meric, Peter A. and Hlavina, Wratko and Schneider, Valerie A.},
+	month = jul,
+	year = {2024},
+	pages = {732},
+	file = {Full Text:/uni-mainz.de/homes/meesters/Zotero/storage/4FFX2589/O’Leary et al. - 2024 - Exploring and retrieving sequence and metadata for.pdf:application/pdf},
+}
+
+@article{cock_biopython_2009,
+	title = {Biopython: freely available {Python} tools for computational molecular biology and bioinformatics},
+	volume = {25},
+	issn = {1367-4803},
+	shorttitle = {Biopython},
+	url = {https://doi.org/10.1093/bioinformatics/btp163},
+	doi = {10.1093/bioinformatics/btp163},
+	abstract = {Summary: The Biopython project is a mature open source international collaboration of volunteer developers, providing Python libraries for a wide range of bioinformatics problems. Biopython includes modules for reading and writing different sequence file formats and multiple sequence alignments, dealing with 3D macro molecular structures, interacting with common tools such as BLAST, ClustalW and EMBOSS, accessing key online databases, as well as providing numerical methods for statistical learning.Availability: Biopython is freely available, with documentation and source code at www.biopython.org under the Biopython license.Contact: All queries should be directed to the Biopython mailing lists, see www.biopython.org/wiki/\_Mailing\_listspeter.cock@scri.ac.uk.},
+	number = {11},
+	urldate = {2024-11-12},
+	journal = {Bioinformatics},
+	author = {Cock, Peter J. A. and Antao, Tiago and Chang, Jeffrey T. and Chapman, Brad A. and Cox, Cymon J. and Dalke, Andrew and Friedberg, Iddo and Hamelryck, Thomas and Kauff, Frank and Wilczynski, Bartek and de Hoon, Michiel J. L.},
+	month = jun,
+	year = {2009},
+	note = {Number: 11},
+	pages = {1422--1423},
+}
+
+@article{de_coster_nanopack_2018,
+	title = {{NanoPack}: visualizing and processing long-read sequencing data},
+	volume = {34},
+	copyright = {http://creativecommons.org/licenses/by/4.0/},
+	issn = {1367-4803, 1367-4811},
+	shorttitle = {{NanoPack}},
+	url = {https://academic.oup.com/bioinformatics/article/34/15/2666/4934939},
+	doi = {10.1093/bioinformatics/bty149},
+	abstract = {Abstract
+            
+              Summary
+              Here we describe NanoPack, a set of tools developed for visualization and processing of long-read sequencing data from Oxford Nanopore Technologies and Pacific Biosciences.
+            
+            
+              Availability and implementation
+              The NanoPack tools are written in Python3 and released under the GNU GPL3.0 License. The source code can be found at https://github.com/wdecoster/nanopack, together with links to separate scripts and their documentation. The scripts are compatible with Linux, Mac OS and the MS Windows 10 subsystem for Linux and are available as a graphical user interface, a web service at http://nanoplot.bioinf.be and command line tools.
+            
+            
+              Supplementary information
+              Supplementary data are available at Bioinformatics online.},
+	language = {en},
+	number = {15},
+	urldate = {2025-04-08},
+	journal = {Bioinformatics},
+	author = {De Coster, Wouter and D’Hert, Svenn and Schultz, Darrin T and Cruts, Marc and Van Broeckhoven, Christine},
+	editor = {Berger, Bonnier},
+	month = aug,
+	year = {2018},
+	pages = {2666--2669},
+	file = {Full Text:/uni-mainz.de/homes/meesters/Zotero/storage/ZWVHB4RC/De Coster et al. - 2018 - NanoPack visualizing and processing long-read seq.pdf:application/pdf},
+}
+
+@article{li_minimap2_2018,
+	title = {Minimap2: pairwise alignment for nucleotide sequences},
+	volume = {34},
+	copyright = {https://academic.oup.com/journals/pages/open\_access/funder\_policies/chorus/standard\_publication\_model},
+	issn = {1367-4803, 1367-4811},
+	shorttitle = {Minimap2},
+	url = {https://academic.oup.com/bioinformatics/article/34/18/3094/4994778},
+	doi = {10.1093/bioinformatics/bty191},
+	abstract = {Abstract
+            
+              Motivation
+              Recent advances in sequencing technologies promise ultra-long reads of ∼100 kb in average, full-length mRNA or cDNA reads in high throughput and genomic contigs over 100 Mb in length. Existing alignment programs are unable or inefficient to process such data at scale, which presses for the development of new alignment algorithms.
+            
+            
+              Results
+              Minimap2 is a general-purpose alignment program to map DNA or long mRNA sequences against a large reference database. It works with accurate short reads of ≥100 bp in length, ≥1 kb genomic reads at error rate ∼15\%, full-length noisy Direct RNA or cDNA reads and assembly contigs or closely related full chromosomes of hundreds of megabases in length. Minimap2 does split-read alignment, employs concave gap cost for long insertions and deletions and introduces new heuristics to reduce spurious alignments. It is 3–4 times as fast as mainstream short-read mappers at comparable accuracy, and is ≥30 times faster than long-read genomic or cDNA mappers at higher accuracy, surpassing most aligners specialized in one type of alignment.
+            
+            
+              Availability and implementation
+              https://github.com/lh3/minimap2
+            
+            
+              Supplementary information
+              Supplementary data are available at Bioinformatics online.},
+	language = {en},
+	number = {18},
+	urldate = {2025-04-08},
+	journal = {Bioinformatics},
+	author = {Li, Heng},
+	editor = {Birol, Inanc},
+	month = sep,
+	year = {2018},
+	pages = {3094--3100},
+	file = {Full Text:/uni-mainz.de/homes/meesters/Zotero/storage/VAAMZVYK/Li - 2018 - Minimap2 pairwise alignment for nucleotide sequen.pdf:application/pdf},
+}
+
+@article{zhu_heavy-tailed_2019,
+	title = {Heavy-tailed prior distributions for sequence count data: removing the noise and preserving large differences},
+	volume = {35},
+	copyright = {http://creativecommons.org/licenses/by-nc/4.0/},
+	issn = {1367-4803, 1367-4811},
+	shorttitle = {Heavy-tailed prior distributions for sequence count data},
+	url = {https://academic.oup.com/bioinformatics/article/35/12/2084/5159452},
+	doi = {10.1093/bioinformatics/bty895},
+	abstract = {Abstract
+            
+              Motivation
+              In RNA-seq differential expression analysis, investigators aim to detect those genes with changes in expression level across conditions, despite technical and biological variability in the observations. A common task is to accurately estimate the effect size, often in terms of a logarithmic fold change (LFC).
+            
+            
+              Results
+              When the read counts are low or highly variable, the maximum likelihood estimates for the LFCs has high variance, leading to large estimates not representative of true differences, and poor ranking of genes by effect size. One approach is to introduce filtering thresholds and pseudocounts to exclude or moderate estimated LFCs. Filtering may result in a loss of genes from the analysis with true differences in expression, while pseudocounts provide a limited solution that must be adapted per dataset. Here, we propose the use of a heavy-tailed Cauchy prior distribution for effect sizes, which avoids the use of filter thresholds or pseudocounts. The proposed method, Approximate Posterior Estimation for generalized linear model, apeglm, has lower bias than previously proposed shrinkage estimators, while still reducing variance for those genes with little information for statistical inference.
+            
+            
+              Availability and implementation
+              The apeglm package is available as an R/Bioconductor package at https://bioconductor.org/packages/apeglm, and the methods can be called from within the DESeq2 software.
+            
+            
+              Supplementary information
+              Supplementary data are available at Bioinformatics online.},
+	language = {en},
+	number = {12},
+	urldate = {2025-04-08},
+	journal = {Bioinformatics},
+	author = {Zhu, Anqi and Ibrahim, Joseph G and Love, Michael I},
+	editor = {Stegle, Oliver},
+	month = jun,
+	year = {2019},
+	pages = {2084--2092},
+	file = {Full Text:/uni-mainz.de/homes/meesters/Zotero/storage/Y3R2C7BG/Zhu et al. - 2019 - Heavy-tailed prior distributions for sequence coun.pdf:application/pdf},
+}
+
+@article{love_moderated_2014,
+	title = {Moderated estimation of fold change and dispersion for {RNA}-seq data with {DESeq2}},
+	volume = {15},
+	issn = {1474-760X},
+	url = {https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0550-8},
+	doi = {10.1186/s13059-014-0550-8},
+	abstract = {Abstract
+            
+              In comparative high-throughput sequencing assays, a fundamental task is the analysis of count data, such as read counts per gene in RNA-seq, for evidence of systematic changes across experimental conditions. Small replicate numbers, discreteness, large dynamic range and the presence of outliers require a suitable statistical approach. We present
+              DESeq2
+              , a method for differential analysis of count data, using shrinkage estimation for dispersions and fold changes to improve stability and interpretability of estimates. This enables a more quantitative analysis focused on the strength rather than the mere presence of differential expression. The
+              DESeq2
+              package is available at
+              http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
+              .},
+	language = {en},
+	number = {12},
+	urldate = {2025-04-08},
+	journal = {Genome Biology},
+	author = {Love, Michael I and Huber, Wolfgang and Anders, Simon},
+	month = dec,
+	year = {2014},
+	pages = {550},
+	file = {Full Text:/uni-mainz.de/homes/meesters/Zotero/storage/KM87QE3P/Love et al. - 2014 - Moderated estimation of fold change and dispersion.pdf:application/pdf},
+}
+
+@ARTICLE{Chen2025-ev,
+  title     = "A systematic benchmark of Nanopore long-read {RNA} sequencing
+               for transcript-level analysis in human cell lines",
+  author    = "Chen, Ying and Davidson, Nadia M and Wan, Yuk Kei and Yao, Fei
+               and Su, Yan and Gamaarachchi, Hasindu and Sim, Andre and Patel,
+               Harshil and Low, Hwee Meng and Hendra, Christopher and Wratten,
+               Laura and Hakkaart, Christopher and Sawyer, Chelsea and
+               Iakovleva, Viktoriia and Lee, Puay Leng and Xin, Lixia and Ng,
+               Hui En Vanessa and Loo, Jia Min and Ong, Xuewen and Ng, Hui Qi
+               Amanda and Wang, Jiaxu and Koh, Wei Qian Casslynn and Poon, Suk
+               Yeah Polly and Stanojevic, Dominik and Tran, Hoang-Dai and Lim,
+               Kok Hao Edwin and Toh, Shen Yon and Ewels, Philip Andrew and Ng,
+               Huck-Hui and Iyer, N Gopalakrishna and Thiery, Alexandre and
+               Chng, Wee Joo and Chen, Leilei and DasGupta, Ramanuj and Sikic,
+               Mile and Chan, Yun-Shen and Tan, Boon Ooi Patrick and Wan, Yue
+               and Tam, Wai Leong and Yu, Qiang and Khor, Chiea Chuan and
+               W{\"u}stefeld, Torsten and Lezhava, Alexander and Pratanwanich,
+               Ploy N and Love, Michael I and Goh, Wee Siong Sho and Ng, Sarah
+               B and Oshlack, Alicia and {SG-NEx consortium} and G{\"o}ke,
+               Jonathan",
+  abstract  = "The human genome contains instructions to transcribe more than
+               200,000 RNAs. However, many RNA transcripts are generated from
+               the same gene, resulting in alternative isoforms that are highly
+               similar and that remain difficult to quantify. To evaluate the
+               ability to study RNA transcript expression, we profiled seven
+               human cell lines with five different RNA-sequencing protocols,
+               including short-read cDNA, Nanopore long-read direct RNA,
+               amplification-free direct cDNA and PCR-amplified cDNA
+               sequencing, and PacBio IsoSeq, with multiple spike-in controls,
+               and additional transcriptome-wide N6-methyladenosine profiling
+               data. We describe differences in read length, coverage,
+               throughput and transcript expression, reporting that long-read
+               RNA sequencing more robustly identifies major isoforms. We
+               illustrate the value of the SG-NEx data to identify alternative
+               isoforms, novel transcripts, fusion transcripts and
+               N6-methyladenosine RNA modifications. Together, the SG-NEx data
+               provide a comprehensive resource enabling the development and
+               benchmarking of computational methods for profiling complex
+               transcriptional events at isoform-level resolution.",
+  journal   = "Nat. Methods",
+  publisher = "Springer Science and Business Media LLC",
+  volume    =  22,
+  number    =  4,
+  pages     = "801--812",
+  month     =  apr,
+  year      =  2025,
+  copyright = "https://creativecommons.org/licenses/by/4.0",
+  language  = "en"
+}
+
+@ARTICLE{Chen2025-yk,
+  title     = "A systematic benchmark of Nanopore long-read {RNA} sequencing
+               for transcript-level analysis in human cell lines",
+  author    = "Chen, Ying and Davidson, Nadia M and Wan, Yuk Kei and Yao, Fei
+               and Su, Yan and Gamaarachchi, Hasindu and Sim, Andre and Patel,
+               Harshil and Low, Hwee Meng and Hendra, Christopher and Wratten,
+               Laura and Hakkaart, Christopher and Sawyer, Chelsea and
+               Iakovleva, Viktoriia and Lee, Puay Leng and Xin, Lixia and Ng,
+               Hui En Vanessa and Loo, Jia Min and Ong, Xuewen and Ng, Hui Qi
+               Amanda and Wang, Jiaxu and Koh, Wei Qian Casslynn and Poon, Suk
+               Yeah Polly and Stanojevic, Dominik and Tran, Hoang-Dai and Lim,
+               Kok Hao Edwin and Toh, Shen Yon and Ewels, Philip Andrew and Ng,
+               Huck-Hui and Iyer, N Gopalakrishna and Thiery, Alexandre and
+               Chng, Wee Joo and Chen, Leilei and DasGupta, Ramanuj and Sikic,
+               Mile and Chan, Yun-Shen and Tan, Boon Ooi Patrick and Wan, Yue
+               and Tam, Wai Leong and Yu, Qiang and Khor, Chiea Chuan and
+               W{\"u}stefeld, Torsten and Lezhava, Alexander and Pratanwanich,
+               Ploy N and Love, Michael I and Goh, Wee Siong Sho and Ng, Sarah
+               B and Oshlack, Alicia and {SG-NEx consortium} and G{\"o}ke,
+               Jonathan",
+  abstract  = "The human genome contains instructions to transcribe more than
+               200,000 RNAs. However, many RNA transcripts are generated from
+               the same gene, resulting in alternative isoforms that are highly
+               similar and that remain difficult to quantify. To evaluate the
+               ability to study RNA transcript expression, we profiled seven
+               human cell lines with five different RNA-sequencing protocols,
+               including short-read cDNA, Nanopore long-read direct RNA,
+               amplification-free direct cDNA and PCR-amplified cDNA
+               sequencing, and PacBio IsoSeq, with multiple spike-in controls,
+               and additional transcriptome-wide N6-methyladenosine profiling
+               data. We describe differences in read length, coverage,
+               throughput and transcript expression, reporting that long-read
+               RNA sequencing more robustly identifies major isoforms. We
+               illustrate the value of the SG-NEx data to identify alternative
+               isoforms, novel transcripts, fusion transcripts and
+               N6-methyladenosine RNA modifications. Together, the SG-NEx data
+               provide a comprehensive resource enabling the development and
+               benchmarking of computational methods for profiling complex
+               transcriptional events at isoform-level resolution.",
+  journal   = "Nat. Methods",
+  publisher = "Springer Science and Business Media LLC",
+  volume    =  22,
+  number    =  4,
+  pages     = "801--812",
+  month     =  apr,
+  year      =  2025,
+  copyright = "https://creativecommons.org/licenses/by/4.0",
+  language  = "en"
+}
\ No newline at end of file
diff --git a/paper/paper.md b/paper/paper.md
new file mode 100644
index 0000000..38ca5da
--- /dev/null
+++ b/paper/paper.md
@@ -0,0 +1,136 @@
+---
+title: 'A Snakemake workflow for differential expression analysis with alternative splicing detection using long-read data'
+tags:
+  - Snakemake
+  - Nanopore
+  - HPC
+  - differential gene expression
+  - alternative splicing detection
+authors:
+  - name: Yannic Eising
+    orcid: 0009-0003-9103-5689
+    affiliation: [1, 2]
+  - name: Sören Lukas Hellmann
+    orcid: 0000-0003-4958-1419
+    affiliation: 1
+  - name: Christiane Krämer
+    orcid: 0009-0000-6220-076X
+    affiliation: 1
+  - name: Christian Meesters
+    corresponding: true
+    orcid: 0000-0003-2408-7588
+    affiliation: 2
+affiliations:
+  - name: Nucleic Acids Core Facility, Johannes Gutenberg-University Mainz, Germany
+    index: 1
+  - name: NHR-SouthWest / High Performance Computing Group, Johannes Gutenberg-University   Mainz, Germany
+    index: 2
+   
+date: 04 April 2025 <- update
+bibliography: paper.bib
+
+---
+
+# Summary
+
+Long-read RNA sequencing technologies enable the characterization of full-length transcripts and complex splicing patterns. While offering new opportunities for transcriptomic analysis, these data come with substantial computational demands, especially when scaling to multiple samples, replicates, and experimental conditions.
+
+We present a modular, reproducible workflow tailored for differential expression and alternative splicing analysis from long-read RNA sequencing data.
+The workflow is designed for use on high-performance compute (HPC) clusters or cloud systems, enabling efficient parallel execution of computationally intensive steps such as read alignment, quantification, and isoform detection.
+
+It supports quality filtering, statistical analysis of gene expression across conditions, and isoform-level splicing analysis. For ill-annotated or novel genomes, it includes an optional annotation step based on local similarity searches to assign putative gene functions.
+Reference data can be supplied via local files or retrieved automatically using NCBI accession numbers.
+
+It is well-suited for researchers working with large datasets and complex experimental designs who require transparent, reproducible, and HPC-compatible analysis workflows.
+
+# Statement of Need
+
+Long-read sequencing technologies, such as Oxford Nanopore Technologies (ONT), have revolutionized transcriptomic studies by enabling direct detection of full-length RNA molecules [@delahaye_sequencing_2021].
+This advancement facilitates more accurate analyses of differential gene expression [@dong_long_2021] and alternative splicing events, both of which are essential for understanding transcriptomic complexity and functional genomics.
+However, analyzing long-read transcriptomic data remains technically challenging due to the intricacies of read preprocessing, isoform-level quantification, and the need for reproducible and scalable computational workflows.
+
+Several existing tools, such as FLAIR [@tang_full-length_2020-1], provide frameworks for isoform-level analysis of long-read transcriptomic data.
+While these tools offer powerful features, they often rely on manual configuration, may not fully support reproducible execution across computing environments, and frequently lack integration with high-performance computing (HPC) infrastructure.
+
+To address these gaps, we present a Snakemake-based workflow that automates the analysis of Nanopore long-read sequencing data with a focus on differential gene expression and alternative splicing detection.
+While other workflows exist that support either differential expression analysis or isoform-level analysis, our workflow integrates both in a modular and reproducible workflow designed for scalability across local machines, HPC clusters, and cloud environments.
+
+A distinctive feature of our workflow is its capability to operate on ill-annotated or completely unannotated genomes.
+To support these cases, the workflow includes optional local alignments using tools such as BLAST [@altschul_basic_1990;@camacho_blast_2009] or lambda [@Hauswedell2024-ph], enabling the functional annotation of transcripts by identifying putative gene functions.
+This enhances interpretability in non-model organisms and supports exploratory analyses in less-characterized transcriptomes.
+
+By leveraging Snakemake’s robust workflow management capabilities [@molder_sustainable_2021], our workflow offers transparent provenance tracking, efficient resource handling, and reproducible execution.
+It provides a flexible foundation for advanced long-read transcriptomic analyses and fills a critical gap in the ecosystem of accessible, reproducible, and extensible workflows for Nanopore RNA sequencing data.
+
+## Implementation
+
+## Input Data and Reference Handling
+
+The workflow accepts raw ONT reads in FASTQ format, along with either user-specified or automatically downloaded reference data. Reference transcriptomes and genome assemblies can be provided as file paths, or alternatively, specified using NCBI accession numbers, in which case the required data are retrieved using `ncbi-datasets` [@oleary_exploring_2024].
+This allows users to flexibly apply the workflow to well-characterized model organisms or newly sequenced, poorly annotated species.
+
+## Quality Filtering and Assessment
+
+Prior to downstream analysis, reads undergo a configurable quality control step. Users can specify a read length threshold. For this we make use of the BioPython library [@cock_biopython_2009]. To ensure sufficient quality, we rely on the ONT basecaller for filtering out low quality reads. Sample quality statistics and read length distributions are assessed using NanoPlot [@de_coster_nanopack_2018], which generates interactive and publication-ready QC plots. These are included in the workflow report and ensure high-confidence input for downstream expression and splicing analysis.
+
+## Transcriptome Alignment and Differential Expression Analysis
+
+Reads passing quality filters are aligned to the reference transcriptome by `minimap2` [@li_minimap2_2018]. Following alignment, read counts per transcript are computed and used for differential expression analysis using pyDESeq2 [@zhu_heavy-tailed_2019;@love_moderated_2014], a Python-native implementation of the DESeq2 method.
+
+This enables statistical analysis of gene expression changes across experimental conditions while staying within a Python-based workflow ecosystem.
+
+## Alternative Splicing Analysis
+
+For isoform-level analysis, the workflow integrates the FLAIR toolkit [@tang_full-length_2020-1]. We adapted the FLAIR plotting script to improve Snakemake compatibility and enable automated per-gene isoform visualization. Isoforms are collapsed, quantified, and categorized to identify splicing patterns and events across conditions.
+
+# Optional Functional Annotation via Local Alignment
+
+When reference data are incomplete, unannotated, or of uncertain quality, the workflow offers optional functional annotation. Transcripts or isoforms can be locally aligned against curated UniRef protein data bases using BLAST or lambda. This provides putative gene product functions that support biological interpretation in non-model organisms or exploratory studies.
+
+# Workflow at a Glance
+
+![The Directed Acyclic Graph (DAG) of the complete long-read RNA-Seq workflow. The graph was generated by the Snakemake command: 'snakemake --rulegraph | dot -Tsvg > rulegraph.svg'.](rulegraph.svg)
+
+TODO: Annotate the workflow with boxes per feature set. Requires latests updates and cleanups to be incorporated
+
+## Example Dataset
+
+A complete example run using six cDNA Nanopore sequencing datasets from human H9 and HCT 116 cell lines is provided below. The data originate from the SG-NEx project [@Chen2025-yk] and were accesed on 31 March 2025 from [registry.opendata.aws/sg-nex-data](registry.opendata.aws/sg-nex-data). This example illustrates the workflows capabilities in isoform analysis and functional annotation.
+
+[]() # Add report HTML
+
+# Usage
+
+### Configuration
+The workflow uses three configuration files to enable its full functionality:
+1. **`samples.csv`**
+contains sample metadata, including sample IDs and experimental conditions.
+2. **`config/config.yaml`**
+Defines general workflow behavior, rule-specific options, and paths to input data.
+3. **`profile/config.yaml`**
+Stores cluster-specific variables, enabling resource allocation settings tailored to your computing environment.
+
+
+### Running the workflow
+The workflow can be started by executing the following Snakemake command:
+
+```
+snakemake -j unlimited \                    # Run snakemake with no job limit
+--workflow-profile <profile_dir> \          # Use a specific Snakemake profile
+--configfile ../config/<config_file> \      # Specify the main config file
+--directory <results_dir> \                 # Set working directory
+--sdm_conda \                               # Enable conda software deployment
+--conda_prefix <conda_envs_dir>             # Custom prefix for conda environments
+```
+
+Here, we assume the workflow directory as the current working directory. When working from a different directory specifying the workflow with `--snakefile <path to Snakefile>` is necessary.
+
+The `profile_directory` contains a template configuration for the cluster "Mogon NHR" in Mainz, Germany. We encourage users to contribute their profiles to the repository.
+
+For detailed information about these and additional options, see the [Snakemake command-line options](https://snakemake.readthedocs.io/en/stable/executing/cli.html#all-options).
+
+# Acknowledgements
+
+Any?
+
+# References
diff --git a/paper/rulegraph.svg b/paper/rulegraph.svg
new file mode 100644
index 0000000..2e8c064
--- /dev/null
+++ b/paper/rulegraph.svg
@@ -0,0 +1,511 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.40.1 (0)
+ -->
+<!-- Title: snakemake_dag Pages: 1 -->
+<svg width="1056pt" height="908pt"
+ viewBox="0.00 0.00 1055.50 908.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 904)">
+<title>snakemake_dag</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-904 1051.5,-904 1051.5,4 -4,4"/>
+<!-- 0 -->
+<g id="node1" class="node">
+<title>0</title>
+<path fill="none" stroke="#d87556" stroke-width="2" d="M511.5,-36C511.5,-36 481.5,-36 481.5,-36 475.5,-36 469.5,-30 469.5,-24 469.5,-24 469.5,-12 469.5,-12 469.5,-6 475.5,0 481.5,0 481.5,0 511.5,0 511.5,0 517.5,0 523.5,-6 523.5,-12 523.5,-12 523.5,-24 523.5,-24 523.5,-30 517.5,-36 511.5,-36"/>
+<text text-anchor="middle" x="496.5" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">all</text>
+</g>
+<!-- 1 -->
+<g id="node2" class="node">
+<title>1</title>
+<path fill="none" stroke="#afd856" stroke-width="2" d="M79,-108C79,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 79,-72 79,-72 85,-72 91,-78 91,-84 91,-84 91,-96 91,-96 91,-102 85,-108 79,-108"/>
+<text text-anchor="middle" x="45.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">dump_versions</text>
+</g>
+<!-- 1&#45;&gt;0 -->
+<g id="edge5" class="edge">
+<title>1&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M91.2414,-74.3446C94.3578,-73.4903 97.4646,-72.6976 100.5,-72 230.4291,-42.1384 388.3931,-26.6828 459.1205,-20.8299"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="459.6232,-24.3007 469.3075,-20.0038 459.0573,-17.3236 459.6232,-24.3007"/>
+</g>
+<!-- 2 -->
+<g id="node3" class="node">
+<title>2</title>
+<path fill="none" stroke="#5682d8" stroke-width="2" d="M191.5,-108C191.5,-108 121.5,-108 121.5,-108 115.5,-108 109.5,-102 109.5,-96 109.5,-96 109.5,-84 109.5,-84 109.5,-78 115.5,-72 121.5,-72 121.5,-72 191.5,-72 191.5,-72 197.5,-72 203.5,-78 203.5,-84 203.5,-84 203.5,-96 203.5,-96 203.5,-102 197.5,-108 191.5,-108"/>
+<text text-anchor="middle" x="156.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">sample_qa_plot</text>
+</g>
+<!-- 2&#45;&gt;0 -->
+<g id="edge7" class="edge">
+<title>2&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M203.7375,-74.3652C206.6919,-73.5276 209.6301,-72.7311 212.5,-72 299.9979,-49.7096 404.7741,-32.0799 459.3318,-23.5687"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="459.8884,-27.0243 469.2365,-22.0379 458.8192,-20.1064 459.8884,-27.0243"/>
+</g>
+<!-- 3 -->
+<g id="node4" class="node">
+<title>3</title>
+<path fill="none" stroke="#5663d8" stroke-width="2" d="M331.5,-108C331.5,-108 233.5,-108 233.5,-108 227.5,-108 221.5,-102 221.5,-96 221.5,-96 221.5,-84 221.5,-84 221.5,-78 227.5,-72 233.5,-72 233.5,-72 331.5,-72 331.5,-72 337.5,-72 343.5,-78 343.5,-84 343.5,-84 343.5,-96 343.5,-96 343.5,-102 337.5,-108 331.5,-108"/>
+<text text-anchor="middle" x="282.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">total_sample_qa_plot</text>
+</g>
+<!-- 3&#45;&gt;0 -->
+<g id="edge3" class="edge">
+<title>3&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M336.225,-71.9243C374.7639,-58.9579 425.6822,-41.8265 459.6833,-30.3869"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="460.8677,-33.6813 469.2295,-27.1751 458.6355,-27.0467 460.8677,-33.6813"/>
+</g>
+<!-- 4 -->
+<g id="node5" class="node">
+<title>4</title>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M629.5,-180C629.5,-180 585.5,-180 585.5,-180 579.5,-180 573.5,-174 573.5,-168 573.5,-168 573.5,-156 573.5,-156 573.5,-150 579.5,-144 585.5,-144 585.5,-144 629.5,-144 629.5,-144 635.5,-144 641.5,-150 641.5,-156 641.5,-156 641.5,-168 641.5,-168 641.5,-174 635.5,-180 629.5,-180"/>
+<text text-anchor="middle" x="607.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">bam_stats</text>
+</g>
+<!-- 4&#45;&gt;0 -->
+<g id="edge8" class="edge">
+<title>4&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M593.4418,-143.7623C574.0087,-118.5518 538.8267,-72.9103 516.615,-44.0952"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="519.3212,-41.8729 510.4441,-36.0896 513.7772,-46.1465 519.3212,-41.8729"/>
+</g>
+<!-- 5 -->
+<g id="node6" class="node">
+<title>5</title>
+<path fill="none" stroke="#5692d8" stroke-width="2" d="M619.5,-540C619.5,-540 563.5,-540 563.5,-540 557.5,-540 551.5,-534 551.5,-528 551.5,-528 551.5,-516 551.5,-516 551.5,-510 557.5,-504 563.5,-504 563.5,-504 619.5,-504 619.5,-504 625.5,-504 631.5,-510 631.5,-516 631.5,-516 631.5,-528 631.5,-528 631.5,-534 625.5,-540 619.5,-540"/>
+<text text-anchor="middle" x="591.5" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">sam_to_bam</text>
+</g>
+<!-- 5&#45;&gt;4 -->
+<g id="edge12" class="edge">
+<title>5&#45;&gt;4</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M593.7792,-503.8486C597.0012,-476.5755 602.5,-423.3818 602.5,-378 602.5,-378 602.5,-378 602.5,-306 602.5,-265.8518 604.4653,-219.4451 605.9316,-190.4076"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="609.4394,-190.3468 606.464,-180.1784 602.4489,-189.9829 609.4394,-190.3468"/>
+</g>
+<!-- 15 -->
+<g id="node16" class="node">
+<title>15</title>
+<path fill="none" stroke="#d88d56" stroke-width="2" d="M549.5,-468C549.5,-468 511.5,-468 511.5,-468 505.5,-468 499.5,-462 499.5,-456 499.5,-456 499.5,-444 499.5,-444 499.5,-438 505.5,-432 511.5,-432 511.5,-432 549.5,-432 549.5,-432 555.5,-432 561.5,-438 561.5,-444 561.5,-444 561.5,-456 561.5,-456 561.5,-462 555.5,-468 549.5,-468"/>
+<text text-anchor="middle" x="530.5" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">bam_sort</text>
+</g>
+<!-- 5&#45;&gt;15 -->
+<g id="edge22" class="edge">
+<title>5&#45;&gt;15</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M576.1071,-503.8314C569.0096,-495.454 560.452,-485.3531 552.676,-476.1749"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="555.2348,-473.7807 546.1001,-468.4133 549.8939,-478.3056 555.2348,-473.7807"/>
+</g>
+<!-- 16 -->
+<g id="node17" class="node">
+<title>16</title>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M698.5,-468C698.5,-468 646.5,-468 646.5,-468 640.5,-468 634.5,-462 634.5,-456 634.5,-456 634.5,-444 634.5,-444 634.5,-438 640.5,-432 646.5,-432 646.5,-432 698.5,-432 698.5,-432 704.5,-432 710.5,-438 710.5,-444 710.5,-444 710.5,-456 710.5,-456 710.5,-462 704.5,-468 698.5,-468"/>
+<text text-anchor="middle" x="672.5" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">count_reads</text>
+</g>
+<!-- 5&#45;&gt;16 -->
+<g id="edge23" class="edge">
+<title>5&#45;&gt;16</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M611.9397,-503.8314C621.8205,-495.0485 633.8324,-484.3712 644.545,-474.8489"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="646.918,-477.4224 652.0668,-468.1628 642.2674,-472.1906 646.918,-477.4224"/>
+</g>
+<!-- 6 -->
+<g id="node7" class="node">
+<title>6</title>
+<path fill="none" stroke="#56b9d8" stroke-width="2" d="M613,-612C613,-612 566,-612 566,-612 560,-612 554,-606 554,-600 554,-600 554,-588 554,-588 554,-582 560,-576 566,-576 566,-576 613,-576 613,-576 619,-576 625,-582 625,-588 625,-588 625,-600 625,-600 625,-606 619,-612 613,-612"/>
+<text text-anchor="middle" x="589.5" y="-591.5" font-family="sans" font-size="10.00" fill="#000000">map_reads</text>
+</g>
+<!-- 6&#45;&gt;5 -->
+<g id="edge13" class="edge">
+<title>6&#45;&gt;5</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M590.0047,-575.8314C590.2186,-568.131 590.4729,-558.9743 590.7106,-550.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="594.2094,-550.5066 590.9885,-540.4133 587.2121,-550.3122 594.2094,-550.5066"/>
+</g>
+<!-- 7 -->
+<g id="node8" class="node">
+<title>7</title>
+<path fill="none" stroke="#d8c356" stroke-width="2" d="M638.5,-684C638.5,-684 540.5,-684 540.5,-684 534.5,-684 528.5,-678 528.5,-672 528.5,-672 528.5,-660 528.5,-660 528.5,-654 534.5,-648 540.5,-648 540.5,-648 638.5,-648 638.5,-648 644.5,-648 650.5,-654 650.5,-660 650.5,-660 650.5,-672 650.5,-672 650.5,-678 644.5,-684 638.5,-684"/>
+<text text-anchor="middle" x="589.5" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">build_minimap_index</text>
+</g>
+<!-- 7&#45;&gt;6 -->
+<g id="edge15" class="edge">
+<title>7&#45;&gt;6</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M589.5,-647.8314C589.5,-640.131 589.5,-630.9743 589.5,-622.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="593.0001,-622.4132 589.5,-612.4133 586.0001,-622.4133 593.0001,-622.4132"/>
+</g>
+<!-- 8 -->
+<g id="node9" class="node">
+<title>8</title>
+<path fill="none" stroke="#56d87b" stroke-width="2" d="M708,-756C708,-756 587,-756 587,-756 581,-756 575,-750 575,-744 575,-744 575,-732 575,-732 575,-726 581,-720 587,-720 587,-720 708,-720 708,-720 714,-720 720,-726 720,-732 720,-732 720,-744 720,-744 720,-750 714,-756 708,-756"/>
+<text text-anchor="middle" x="647.5" y="-735.5" font-family="sans" font-size="10.00" fill="#000000">genome_to_transcriptome</text>
+</g>
+<!-- 8&#45;&gt;7 -->
+<g id="edge16" class="edge">
+<title>8&#45;&gt;7</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M632.8642,-719.8314C626.1839,-711.5386 618.1432,-701.557 610.8096,-692.4533"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="613.3319,-690.0052 604.3329,-684.4133 607.8806,-694.3965 613.3319,-690.0052"/>
+</g>
+<!-- 8&#45;&gt;16 -->
+<g id="edge24" class="edge">
+<title>8&#45;&gt;16</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M652.3613,-719.6407C654.9136,-709.2559 657.8464,-695.9871 659.5,-684 669.6747,-610.2433 671.9458,-522.367 672.4137,-478.2434"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="675.9154,-478.0318 672.4975,-468.0034 668.9157,-477.9744 675.9154,-478.0318"/>
+</g>
+<!-- 32 -->
+<g id="node33" class="node">
+<title>32</title>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M1035.5,-252C1035.5,-252 937.5,-252 937.5,-252 931.5,-252 925.5,-246 925.5,-240 925.5,-240 925.5,-228 925.5,-228 925.5,-222 931.5,-216 937.5,-216 937.5,-216 1035.5,-216 1035.5,-216 1041.5,-216 1047.5,-222 1047.5,-228 1047.5,-228 1047.5,-240 1047.5,-240 1047.5,-246 1041.5,-252 1035.5,-252"/>
+<text text-anchor="middle" x="986.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">generate_gene_query</text>
+</g>
+<!-- 8&#45;&gt;32 -->
+<g id="edge50" class="edge">
+<title>8&#45;&gt;32</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M720.1127,-731.1302C801.5872,-718.9578 922.5,-685.4825 922.5,-594 922.5,-594 922.5,-594 922.5,-378 922.5,-334.0584 947.6932,-288.5709 966.4667,-260.7747"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="969.458,-262.6035 972.2963,-252.3959 963.712,-258.6056 969.458,-262.6035"/>
+</g>
+<!-- 9 -->
+<g id="node10" class="node">
+<title>9</title>
+<path fill="none" stroke="#56d892" stroke-width="2" d="M346,-828C346,-828 293,-828 293,-828 287,-828 281,-822 281,-816 281,-816 281,-804 281,-804 281,-798 287,-792 293,-792 293,-792 346,-792 346,-792 352,-792 358,-798 358,-804 358,-804 358,-816 358,-816 358,-822 352,-828 346,-828"/>
+<text text-anchor="middle" x="319.5" y="-807.5" font-family="sans" font-size="10.00" fill="#000000">get_genome</text>
+</g>
+<!-- 9&#45;&gt;8 -->
+<g id="edge17" class="edge">
+<title>9&#45;&gt;8</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M358.0903,-801.529C408.6679,-790.4266 498.7186,-770.6593 564.9797,-756.1142"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="565.9705,-759.4801 574.9875,-753.9174 564.4696,-752.6429 565.9705,-759.4801"/>
+</g>
+<!-- 22 -->
+<g id="node23" class="node">
+<title>22</title>
+<path fill="none" stroke="#88d856" stroke-width="2" d="M344,-540C344,-540 287,-540 287,-540 281,-540 275,-534 275,-528 275,-528 275,-516 275,-516 275,-510 281,-504 287,-504 287,-504 344,-504 344,-504 350,-504 356,-510 356,-516 356,-516 356,-528 356,-528 356,-534 350,-540 344,-540"/>
+<text text-anchor="middle" x="315.5" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">flair_collapse</text>
+</g>
+<!-- 9&#45;&gt;22 -->
+<g id="edge30" class="edge">
+<title>9&#45;&gt;22</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M280.8583,-797.5239C261.4743,-789.0837 239.7717,-775.7956 228.5,-756 188.2218,-685.2627 253.8678,-592.3726 291.6971,-547.9061"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="294.49,-550.0279 298.4051,-540.183 289.2052,-545.4376 294.49,-550.0279"/>
+</g>
+<!-- 23 -->
+<g id="node24" class="node">
+<title>23</title>
+<path fill="none" stroke="#78d856" stroke-width="2" d="M340.5,-612C340.5,-612 290.5,-612 290.5,-612 284.5,-612 278.5,-606 278.5,-600 278.5,-600 278.5,-588 278.5,-588 278.5,-582 284.5,-576 290.5,-576 290.5,-576 340.5,-576 340.5,-576 346.5,-576 352.5,-582 352.5,-588 352.5,-588 352.5,-600 352.5,-600 352.5,-606 346.5,-612 340.5,-612"/>
+<text text-anchor="middle" x="315.5" y="-591.5" font-family="sans" font-size="10.00" fill="#000000">flair_correct</text>
+</g>
+<!-- 9&#45;&gt;23 -->
+<g id="edge34" class="edge">
+<title>9&#45;&gt;23</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M280.8196,-793.9272C265.0153,-785.2011 248.3867,-772.7203 239.5,-756 214.3938,-708.7629 257.4479,-651.7771 288.3271,-619.5294"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="290.9547,-621.8493 295.4775,-612.2684 285.9671,-616.9377 290.9547,-621.8493"/>
+</g>
+<!-- 24 -->
+<g id="node25" class="node">
+<title>24</title>
+<path fill="none" stroke="#97d856" stroke-width="2" d="M340,-684C340,-684 299,-684 299,-684 293,-684 287,-678 287,-672 287,-672 287,-660 287,-660 287,-654 293,-648 299,-648 299,-648 340,-648 340,-648 346,-648 352,-654 352,-660 352,-660 352,-672 352,-672 352,-678 346,-684 340,-684"/>
+<text text-anchor="middle" x="319.5" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">flair_align</text>
+</g>
+<!-- 9&#45;&gt;24 -->
+<g id="edge37" class="edge">
+<title>9&#45;&gt;24</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M358.1804,-793.9272C373.9847,-785.2011 390.6133,-772.7203 399.5,-756 407.0092,-741.8716 407.0092,-734.1284 399.5,-720 391.2289,-704.438 376.2514,-692.5485 361.4739,-683.9405"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="362.82,-680.6891 352.3604,-679.0014 359.4846,-686.8434 362.82,-680.6891"/>
+</g>
+<!-- 25 -->
+<g id="node26" class="node">
+<title>25</title>
+<path fill="none" stroke="#d8bc56" stroke-width="2" d="M378.5,-756C378.5,-756 260.5,-756 260.5,-756 254.5,-756 248.5,-750 248.5,-744 248.5,-744 248.5,-732 248.5,-732 248.5,-726 254.5,-720 260.5,-720 260.5,-720 378.5,-720 378.5,-720 384.5,-720 390.5,-726 390.5,-732 390.5,-732 390.5,-744 390.5,-744 390.5,-750 384.5,-756 378.5,-756"/>
+<text text-anchor="middle" x="319.5" y="-735.5" font-family="sans" font-size="10.00" fill="#000000">build_flair_genome_index</text>
+</g>
+<!-- 9&#45;&gt;25 -->
+<g id="edge40" class="edge">
+<title>9&#45;&gt;25</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M319.5,-791.8314C319.5,-784.131 319.5,-774.9743 319.5,-766.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="323.0001,-766.4132 319.5,-756.4133 316.0001,-766.4133 323.0001,-766.4132"/>
+</g>
+<!-- 10 -->
+<g id="node11" class="node">
+<title>10</title>
+<path fill="none" stroke="#5673d8" stroke-width="2" d="M630.5,-828C630.5,-828 562.5,-828 562.5,-828 556.5,-828 550.5,-822 550.5,-816 550.5,-816 550.5,-804 550.5,-804 550.5,-798 556.5,-792 562.5,-792 562.5,-792 630.5,-792 630.5,-792 636.5,-792 642.5,-798 642.5,-804 642.5,-804 642.5,-816 642.5,-816 642.5,-822 636.5,-828 630.5,-828"/>
+<text text-anchor="middle" x="596.5" y="-807.5" font-family="sans" font-size="10.00" fill="#000000">standardize_gff</text>
+</g>
+<!-- 10&#45;&gt;8 -->
+<g id="edge18" class="edge">
+<title>10&#45;&gt;8</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M609.3695,-791.8314C615.1835,-783.6232 622.1696,-773.7606 628.5646,-764.7323"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="631.5331,-766.5966 634.4573,-756.4133 625.821,-762.5505 631.5331,-766.5966"/>
+</g>
+<!-- 26 -->
+<g id="node27" class="node">
+<title>26</title>
+<path fill="none" stroke="#56d8d0" stroke-width="2" d="M460,-684C460,-684 421,-684 421,-684 415,-684 409,-678 409,-672 409,-672 409,-660 409,-660 409,-654 415,-648 421,-648 421,-648 460,-648 460,-648 466,-648 472,-654 472,-660 472,-660 472,-672 472,-672 472,-678 466,-684 460,-684"/>
+<text text-anchor="middle" x="440.5" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">gff_to_gtf</text>
+</g>
+<!-- 10&#45;&gt;26 -->
+<g id="edge41" class="edge">
+<title>10&#45;&gt;26</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M585.2052,-791.9205C572.5337,-772.5867 550.5775,-741.7748 526.5,-720 512.8368,-707.6435 495.9995,-696.3776 480.8556,-687.3705"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="482.5785,-684.3237 472.1716,-682.3371 479.0682,-690.38 482.5785,-684.3237"/>
+</g>
+<!-- 11 -->
+<g id="node12" class="node">
+<title>11</title>
+<path fill="none" stroke="#56d88a" stroke-width="2" d="M629.5,-900C629.5,-900 563.5,-900 563.5,-900 557.5,-900 551.5,-894 551.5,-888 551.5,-888 551.5,-876 551.5,-876 551.5,-870 557.5,-864 563.5,-864 563.5,-864 629.5,-864 629.5,-864 635.5,-864 641.5,-870 641.5,-876 641.5,-876 641.5,-888 641.5,-888 641.5,-894 635.5,-900 629.5,-900"/>
+<text text-anchor="middle" x="596.5" y="-879.5" font-family="sans" font-size="10.00" fill="#000000">get_annotation</text>
+</g>
+<!-- 11&#45;&gt;10 -->
+<g id="edge19" class="edge">
+<title>11&#45;&gt;10</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M596.5,-863.8314C596.5,-856.131 596.5,-846.9743 596.5,-838.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="600.0001,-838.4132 596.5,-828.4133 593.0001,-838.4133 600.0001,-838.4132"/>
+</g>
+<!-- 12 -->
+<g id="node13" class="node">
+<title>12</title>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M506,-756C506,-756 459,-756 459,-756 453,-756 447,-750 447,-744 447,-744 447,-732 447,-732 447,-726 453,-720 459,-720 459,-720 506,-720 506,-720 512,-720 518,-726 518,-732 518,-732 518,-744 518,-744 518,-750 512,-756 506,-756"/>
+<text text-anchor="middle" x="482.5" y="-735.5" font-family="sans" font-size="10.00" fill="#000000">filter_reads</text>
+</g>
+<!-- 12&#45;&gt;6 -->
+<g id="edge14" class="edge">
+<title>12&#45;&gt;6</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M486.9869,-719.6639C492.3616,-700.3917 502.7937,-669.8903 519.5,-648 528.1076,-636.7214 539.6258,-626.5473 550.8303,-618.1505"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="553.198,-620.7595 559.284,-612.087 549.1182,-615.0714 553.198,-620.7595"/>
+</g>
+<!-- 12&#45;&gt;22 -->
+<g id="edge33" class="edge">
+<title>12&#45;&gt;22</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M446.6568,-721.514C430.3346,-712.4516 411.9296,-699.8132 399.5,-684 368.0548,-643.9946 386.8694,-620.1093 361.5,-576 355.8233,-566.1301 348.2473,-556.3461 340.8841,-547.8733"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="343.2544,-545.2753 333.9588,-540.1916 338.0553,-549.9625 343.2544,-545.2753"/>
+</g>
+<!-- 12&#45;&gt;24 -->
+<g id="edge39" class="edge">
+<title>12&#45;&gt;24</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M446.729,-722.1993C421.6062,-711.1021 387.9121,-696.2188 361.5097,-684.5565"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="362.7034,-681.2575 352.1418,-680.4185 359.875,-687.6607 362.7034,-681.2575"/>
+</g>
+<!-- 13 -->
+<g id="node14" class="node">
+<title>13</title>
+<path fill="none" stroke="#d86656" stroke-width="2" d="M543.5,-180C543.5,-180 449.5,-180 449.5,-180 443.5,-180 437.5,-174 437.5,-168 437.5,-168 437.5,-156 437.5,-156 437.5,-150 443.5,-144 449.5,-144 449.5,-144 543.5,-144 543.5,-144 549.5,-144 555.5,-150 555.5,-156 555.5,-156 555.5,-168 555.5,-168 555.5,-174 549.5,-180 543.5,-180"/>
+<text text-anchor="middle" x="496.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">alignment_qa_report</text>
+</g>
+<!-- 13&#45;&gt;0 -->
+<g id="edge11" class="edge">
+<title>13&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M496.5,-143.7623C496.5,-119.201 496.5,-75.2474 496.5,-46.3541"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="500.0001,-46.0896 496.5,-36.0896 493.0001,-46.0897 500.0001,-46.0896"/>
+</g>
+<!-- 14 -->
+<g id="node15" class="node">
+<title>14</title>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M547,-396C547,-396 488,-396 488,-396 482,-396 476,-390 476,-384 476,-384 476,-372 476,-372 476,-366 482,-360 488,-360 488,-360 547,-360 547,-360 553,-360 559,-366 559,-372 559,-372 559,-384 559,-384 559,-390 553,-396 547,-396"/>
+<text text-anchor="middle" x="517.5" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">alignment_qa</text>
+</g>
+<!-- 14&#45;&gt;13 -->
+<g id="edge20" class="edge">
+<title>14&#45;&gt;13</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M515.7457,-359.9555C512.0861,-322.3144 503.6154,-235.1867 499.2343,-190.1246"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="502.7154,-189.7594 498.2641,-180.1451 495.7483,-190.4369 502.7154,-189.7594"/>
+</g>
+<!-- 15&#45;&gt;14 -->
+<g id="edge21" class="edge">
+<title>15&#45;&gt;14</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M527.2196,-431.8314C525.8292,-424.131 524.1759,-414.9743 522.6308,-406.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="526.0458,-405.6322 520.8246,-396.4133 519.1572,-406.8761 526.0458,-405.6322"/>
+</g>
+<!-- 16&#45;&gt;0 -->
+<g id="edge9" class="edge">
+<title>16&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M671.8784,-431.8172C670.9997,-404.504 669.5,-351.2598 669.5,-306 669.5,-306 669.5,-306 669.5,-162 669.5,-91.6597 583.9714,-49.0441 533.1889,-29.9404"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="534.1254,-26.5569 523.5313,-26.4319 531.7351,-33.1362 534.1254,-26.5569"/>
+</g>
+<!-- 17 -->
+<g id="node18" class="node">
+<title>17</title>
+<path fill="none" stroke="#56a9d8" stroke-width="2" d="M799,-396C799,-396 710,-396 710,-396 704,-396 698,-390 698,-384 698,-384 698,-372 698,-372 698,-366 704,-360 710,-360 710,-360 799,-360 799,-360 805,-360 811,-366 811,-372 811,-372 811,-384 811,-384 811,-390 805,-396 799,-396"/>
+<text text-anchor="middle" x="754.5" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">merge_read_counts</text>
+</g>
+<!-- 16&#45;&gt;17 -->
+<g id="edge25" class="edge">
+<title>16&#45;&gt;17</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M693.1921,-431.8314C703.1948,-423.0485 715.355,-412.3712 726.1999,-402.8489"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="728.6094,-405.3909 733.8146,-396.1628 723.9908,-400.1308 728.6094,-405.3909"/>
+</g>
+<!-- 17&#45;&gt;0 -->
+<g id="edge4" class="edge">
+<title>17&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M743.3586,-359.5702C737.6135,-349.3859 730.9065,-336.3312 726.5,-324 712.7431,-285.5025 707.5,-274.8816 707.5,-234 707.5,-234 707.5,-234 707.5,-162 707.5,-120.6022 712.4708,-101.5715 683.5,-72 662.6803,-50.7486 582.0335,-33.1845 533.7089,-24.2891"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="534.092,-20.8019 523.6298,-22.473 532.8507,-27.6909 534.092,-20.8019"/>
+</g>
+<!-- 18 -->
+<g id="node19" class="node">
+<title>18</title>
+<path fill="none" stroke="#bed856" stroke-width="2" d="M819.5,-324C819.5,-324 747.5,-324 747.5,-324 741.5,-324 735.5,-318 735.5,-312 735.5,-312 735.5,-300 735.5,-300 735.5,-294 741.5,-288 747.5,-288 747.5,-288 819.5,-288 819.5,-288 825.5,-288 831.5,-294 831.5,-300 831.5,-300 831.5,-312 831.5,-312 831.5,-318 825.5,-324 819.5,-324"/>
+<text text-anchor="middle" x="783.5" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">diffexp_analysis</text>
+</g>
+<!-- 17&#45;&gt;18 -->
+<g id="edge26" class="edge">
+<title>17&#45;&gt;18</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M761.8179,-359.8314C764.9876,-351.9617 768.77,-342.5712 772.2813,-333.8533"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="775.5939,-334.9968 776.0835,-324.4133 769.1008,-332.3815 775.5939,-334.9968"/>
+</g>
+<!-- 18&#45;&gt;0 -->
+<g id="edge6" class="edge">
+<title>18&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M776.5142,-287.6602C772.6957,-277.2814 768.0377,-264.0123 764.5,-252 741.2633,-173.0995 779.6297,-130.1908 721.5,-72 695.3741,-45.8466 590.5212,-29.3878 533.6423,-22.2017"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="533.9624,-18.7148 523.6099,-20.9675 533.1077,-25.6624 533.9624,-18.7148"/>
+</g>
+<!-- 18&#45;&gt;32 -->
+<g id="edge49" class="edge">
+<title>18&#45;&gt;32</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M831.6082,-288.937C859.8557,-278.9182 895.8159,-266.1638 925.9756,-255.4668"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="927.292,-258.7136 935.5468,-252.0721 924.952,-252.1162 927.292,-258.7136"/>
+</g>
+<!-- 19 -->
+<g id="node20" class="node">
+<title>19</title>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M434.5,-396C434.5,-396 384.5,-396 384.5,-396 378.5,-396 372.5,-390 372.5,-384 372.5,-384 372.5,-372 372.5,-372 372.5,-366 378.5,-360 384.5,-360 384.5,-360 434.5,-360 434.5,-360 440.5,-360 446.5,-366 446.5,-372 446.5,-372 446.5,-384 446.5,-384 446.5,-390 440.5,-396 434.5,-396"/>
+<text text-anchor="middle" x="409.5" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">flair_diffexp</text>
+</g>
+<!-- 19&#45;&gt;0 -->
+<g id="edge1" class="edge">
+<title>19&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M409.5,-359.8146C409.5,-332.4983 409.5,-279.25 409.5,-234 409.5,-234 409.5,-234 409.5,-162 409.5,-115.2117 443.5641,-70.5954 469.0622,-43.7524"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="471.8267,-45.9318 476.3218,-36.3378 466.8249,-41.0346 471.8267,-45.9318"/>
+</g>
+<!-- 28 -->
+<g id="node29" class="node">
+<title>28</title>
+<path fill="none" stroke="#61d856" stroke-width="2" d="M368,-324C368,-324 285,-324 285,-324 279,-324 273,-318 273,-312 273,-312 273,-300 273,-300 273,-294 279,-288 285,-288 285,-288 368,-288 368,-288 374,-288 380,-294 380,-300 380,-300 380,-312 380,-312 380,-318 374,-324 368,-324"/>
+<text text-anchor="middle" x="326.5" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">flair_plot_isoforms</text>
+</g>
+<!-- 19&#45;&gt;28 -->
+<g id="edge43" class="edge">
+<title>19&#45;&gt;28</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M388.5556,-359.8314C378.4309,-351.0485 366.1223,-340.3712 355.1452,-330.8489"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="357.2851,-328.0718 347.4377,-324.1628 352.6981,-333.3595 357.2851,-328.0718"/>
+</g>
+<!-- 20 -->
+<g id="node21" class="node">
+<title>20</title>
+<path fill="none" stroke="#56d85b" stroke-width="2" d="M421,-468C421,-468 364,-468 364,-468 358,-468 352,-462 352,-456 352,-456 352,-444 352,-444 352,-438 358,-432 364,-432 364,-432 421,-432 421,-432 427,-432 433,-438 433,-444 433,-444 433,-456 433,-456 433,-462 427,-468 421,-468"/>
+<text text-anchor="middle" x="392.5" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">flair_quantify</text>
+</g>
+<!-- 20&#45;&gt;19 -->
+<g id="edge27" class="edge">
+<title>20&#45;&gt;19</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M396.7898,-431.8314C398.608,-424.131 400.7699,-414.9743 402.7905,-406.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="406.2608,-406.95 405.1524,-396.4133 399.4481,-405.3414 406.2608,-406.95"/>
+</g>
+<!-- 20&#45;&gt;28 -->
+<g id="edge45" class="edge">
+<title>20&#45;&gt;28</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M382.0919,-431.7457C376.3433,-421.3936 369.2285,-408.1228 363.5,-396 353.756,-375.3796 343.9995,-351.479 336.9534,-333.4922"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="340.2153,-332.2232 333.335,-324.1664 333.6893,-334.7553 340.2153,-332.2232"/>
+</g>
+<!-- 21 -->
+<g id="node22" class="node">
+<title>21</title>
+<path fill="none" stroke="#569ad8" stroke-width="2" d="M454.5,-540C454.5,-540 386.5,-540 386.5,-540 380.5,-540 374.5,-534 374.5,-528 374.5,-528 374.5,-516 374.5,-516 374.5,-510 380.5,-504 386.5,-504 386.5,-504 454.5,-504 454.5,-504 460.5,-504 466.5,-510 466.5,-516 466.5,-516 466.5,-528 466.5,-528 466.5,-534 460.5,-540 454.5,-540"/>
+<text text-anchor="middle" x="420.5" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">reads_manifest</text>
+</g>
+<!-- 21&#45;&gt;20 -->
+<g id="edge29" class="edge">
+<title>21&#45;&gt;20</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M413.4344,-503.8314C410.374,-495.9617 406.7221,-486.5712 403.3318,-477.8533"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="406.5473,-476.4647 399.6607,-468.4133 400.0232,-479.0019 406.5473,-476.4647"/>
+</g>
+<!-- 22&#45;&gt;20 -->
+<g id="edge28" class="edge">
+<title>22&#45;&gt;20</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M334.9304,-503.8314C344.232,-495.1337 355.5204,-484.5783 365.6286,-475.1265"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="368.1621,-477.5493 373.0759,-468.1628 363.3811,-472.4363 368.1621,-477.5493"/>
+</g>
+<!-- 22&#45;&gt;28 -->
+<g id="edge44" class="edge">
+<title>22&#45;&gt;28</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M316.4189,-503.9555C318.3318,-466.3938 322.7542,-379.5541 325.0532,-334.4103"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="328.5627,-334.3102 325.5759,-324.1451 321.5718,-333.9541 328.5627,-334.3102"/>
+</g>
+<!-- 23&#45;&gt;22 -->
+<g id="edge32" class="edge">
+<title>23&#45;&gt;22</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M315.5,-575.8314C315.5,-568.131 315.5,-558.9743 315.5,-550.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="319.0001,-550.4132 315.5,-540.4133 312.0001,-550.4133 319.0001,-550.4132"/>
+</g>
+<!-- 24&#45;&gt;23 -->
+<g id="edge35" class="edge">
+<title>24&#45;&gt;23</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M318.4906,-647.8314C318.0628,-640.131 317.5541,-630.9743 317.0787,-622.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="320.5724,-622.2037 316.523,-612.4133 313.5831,-622.592 320.5724,-622.2037"/>
+</g>
+<!-- 25&#45;&gt;24 -->
+<g id="edge38" class="edge">
+<title>25&#45;&gt;24</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M319.5,-719.8314C319.5,-712.131 319.5,-702.9743 319.5,-694.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="323.0001,-694.4132 319.5,-684.4133 316.0001,-694.4133 323.0001,-694.4132"/>
+</g>
+<!-- 26&#45;&gt;22 -->
+<g id="edge31" class="edge">
+<title>26&#45;&gt;22</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M434.315,-647.8529C427.046,-628.4613 413.5158,-597.6004 394.5,-576 384.2428,-564.3487 370.949,-554.0088 358.2016,-545.5753"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="359.8247,-542.4597 349.5078,-540.0485 356.0692,-548.3671 359.8247,-542.4597"/>
+</g>
+<!-- 26&#45;&gt;23 -->
+<g id="edge36" class="edge">
+<title>26&#45;&gt;23</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M408.9572,-647.8314C392.8209,-638.5368 373.0014,-627.1208 355.7728,-617.1971"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="357.445,-614.1212 347.0327,-612.1628 353.9511,-620.187 357.445,-614.1212"/>
+</g>
+<!-- 27 -->
+<g id="node28" class="node">
+<title>27</title>
+<path fill="none" stroke="#56d8d8" stroke-width="2" d="M369,-252C369,-252 282,-252 282,-252 276,-252 270,-246 270,-240 270,-240 270,-228 270,-228 270,-222 276,-216 282,-216 282,-216 369,-216 369,-216 375,-216 381,-222 381,-228 381,-228 381,-240 381,-240 381,-246 375,-252 369,-252"/>
+<text text-anchor="middle" x="325.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_analysis_report</text>
+</g>
+<!-- 27&#45;&gt;0 -->
+<g id="edge10" class="edge">
+<title>27&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M329.1993,-215.7908C336.6167,-183.3244 356.083,-114.8373 395.5,-72 413.0131,-52.9672 438.8053,-39.3343 459.9415,-30.5277"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="461.277,-33.7633 469.2787,-26.8189 458.6929,-27.2577 461.277,-33.7633"/>
+</g>
+<!-- 28&#45;&gt;27 -->
+<g id="edge42" class="edge">
+<title>28&#45;&gt;27</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M326.2477,-287.8314C326.1407,-280.131 326.0135,-270.9743 325.8947,-262.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="329.3944,-262.3637 325.7557,-252.4133 322.3951,-262.4609 329.3944,-262.3637"/>
+</g>
+<!-- 29 -->
+<g id="node30" class="node">
+<title>29</title>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M878.5,-108C878.5,-108 792.5,-108 792.5,-108 786.5,-108 780.5,-102 780.5,-96 780.5,-96 780.5,-84 780.5,-84 780.5,-78 786.5,-72 792.5,-72 792.5,-72 878.5,-72 878.5,-72 884.5,-72 890.5,-78 890.5,-84 890.5,-84 890.5,-96 890.5,-96 890.5,-102 884.5,-108 878.5,-108"/>
+<text text-anchor="middle" x="835.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">get_protein_names</text>
+</g>
+<!-- 29&#45;&gt;0 -->
+<g id="edge2" class="edge">
+<title>29&#45;&gt;0</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M780.3369,-76.7297C773.3306,-75.1047 766.2524,-73.4925 759.5,-72 679.2363,-54.2593 584.7507,-35.3354 533.7257,-25.2772"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="534.3051,-21.8241 523.8175,-23.3279 532.9538,-28.6925 534.3051,-21.8241"/>
+</g>
+<!-- 30 -->
+<g id="node31" class="node">
+<title>30</title>
+<path fill="none" stroke="#56c9d8" stroke-width="2" d="M901.5,-180C901.5,-180 785.5,-180 785.5,-180 779.5,-180 773.5,-174 773.5,-168 773.5,-168 773.5,-156 773.5,-156 773.5,-150 779.5,-144 785.5,-144 785.5,-144 901.5,-144 901.5,-144 907.5,-144 913.5,-150 913.5,-156 913.5,-156 913.5,-168 913.5,-168 913.5,-174 907.5,-180 901.5,-180"/>
+<text text-anchor="middle" x="843.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">lambda_gene_annotation</text>
+</g>
+<!-- 30&#45;&gt;29 -->
+<g id="edge46" class="edge">
+<title>30&#45;&gt;29</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M841.4813,-143.8314C840.6257,-136.131 839.6083,-126.9743 838.6574,-118.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="842.1289,-117.9656 837.5459,-108.4133 835.1717,-118.7386 842.1289,-117.9656"/>
+</g>
+<!-- 31 -->
+<g id="node32" class="node">
+<title>31</title>
+<path fill="none" stroke="#56d8a2" stroke-width="2" d="M895.5,-252C895.5,-252 785.5,-252 785.5,-252 779.5,-252 773.5,-246 773.5,-240 773.5,-240 773.5,-228 773.5,-228 773.5,-222 779.5,-216 785.5,-216 785.5,-216 895.5,-216 895.5,-216 901.5,-216 907.5,-222 907.5,-228 907.5,-228 907.5,-240 907.5,-240 907.5,-246 901.5,-252 895.5,-252"/>
+<text text-anchor="middle" x="840.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">get_indexed_protein_db</text>
+</g>
+<!-- 31&#45;&gt;30 -->
+<g id="edge48" class="edge">
+<title>31&#45;&gt;30</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M841.257,-215.8314C841.5779,-208.131 841.9594,-198.9743 842.316,-190.4166"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="845.8133,-190.5503 842.7328,-180.4133 838.8194,-190.2589 845.8133,-190.5503"/>
+</g>
+<!-- 32&#45;&gt;30 -->
+<g id="edge47" class="edge">
+<title>32&#45;&gt;30</title>
+<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M950.4151,-215.8314C931.7011,-206.4089 908.6568,-194.8062 888.76,-184.7883"/>
+<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="890.0792,-181.5339 879.5734,-180.1628 886.9311,-187.7861 890.0792,-181.5339"/>
+</g>
+</g>
+</svg>