@@ -87,14 +87,46 @@ void Hashtable::consume_seqfile_banding(
8787 n_consumed);
8888}
8989
90+ template <typename SeqIO>
91+ void Hashtable::consume_seqfile_with_mask (
92+ std::string const &filename,
93+ Hashtable* mask,
94+ unsigned int threshold,
95+ unsigned int &total_reads,
96+ unsigned long long &n_consumed
97+ )
98+ {
99+ ReadParserPtr<SeqIO> parser = get_parser<SeqIO>(filename);
100+ consume_seqfile_with_mask<SeqIO>(
101+ parser, mask, threshold, total_reads, n_consumed
102+ );
103+ }
104+
105+ template <typename SeqIO>
106+ void Hashtable::consume_seqfile_banding_with_mask (
107+ std::string const &filename,
108+ unsigned int num_bands,
109+ unsigned int band,
110+ Hashtable* mask,
111+ unsigned int threshold,
112+ unsigned int &total_reads,
113+ unsigned long long &n_consumed
114+ )
115+ {
116+ ReadParserPtr<SeqIO> parser = get_parser<SeqIO>(filename);
117+ consume_seqfile_banding_with_mask<SeqIO>(
118+ parser, num_bands, band, mask, threshold, total_reads, n_consumed
119+ );
120+ }
121+
90122template <typename SeqIO>
91123void Hashtable::consume_seqfile (
92124 ReadParserPtr<SeqIO>& parser,
93125 unsigned int &total_reads,
94126 unsigned long long &n_consumed
95127)
96128{
97- Read read;
129+ Read read;
98130
99131 // Iterate through the reads and consume their k-mers.
100132 while (!parser->is_complete ( )) {
@@ -114,6 +146,43 @@ void Hashtable::consume_seqfile(
114146
115147} // consume_seqfile
116148
149+ template <typename SeqIO>
150+ void Hashtable::consume_seqfile_with_mask (
151+ ReadParserPtr<SeqIO>& parser,
152+ Hashtable* mask,
153+ unsigned int threshold,
154+ unsigned int &total_reads,
155+ unsigned long long &n_consumed
156+ )
157+ {
158+ Read read;
159+
160+ // Iterate through the reads and consume their k-mers.
161+ while (!parser->is_complete ( )) {
162+ try {
163+ read = parser->get_next_read ( );
164+ } catch (NoMoreReadsAvailable) {
165+ break ;
166+ }
167+
168+ read.set_clean_seq ();
169+ unsigned int this_n_consumed = 0 ;
170+ KmerHashIteratorPtr kmers = new_kmer_iterator (read.cleaned_seq );
171+ while (!kmers->done ()) {
172+ HashIntoType kmer = kmers->next ();
173+ if (mask->get_count (kmer) <= threshold) {
174+ count (kmer);
175+ this_n_consumed++;
176+ }
177+ }
178+
179+ __sync_add_and_fetch ( &n_consumed, this_n_consumed );
180+ __sync_add_and_fetch ( &total_reads, 1 );
181+
182+ } // while reads left for parser
183+
184+ } // consume_seqfile_with_mask
185+
117186template <typename SeqIO>
118187void Hashtable::consume_seqfile_banding (
119188 ReadParserPtr<SeqIO>& parser,
@@ -152,6 +221,49 @@ void Hashtable::consume_seqfile_banding(
152221
153222} // consume_seqfile_banding
154223
224+ template <typename SeqIO>
225+ void Hashtable::consume_seqfile_banding_with_mask (
226+ ReadParserPtr<SeqIO>& parser,
227+ unsigned int num_bands,
228+ unsigned int band,
229+ Hashtable* mask,
230+ unsigned int threshold,
231+ unsigned int &total_reads,
232+ unsigned long long &n_consumed
233+ )
234+ {
235+ Read read;
236+ std::pair<uint64_t , uint64_t > interval = compute_band_interval (num_bands,
237+ band);
238+ std::cerr << " DEBUGGGG threshold=" << threshold << ' \n ' ;
239+
240+ while (!parser->is_complete ()) {
241+ try {
242+ read = parser->get_next_read ( );
243+ } catch (NoMoreReadsAvailable) {
244+ break ;
245+ }
246+
247+ read.set_clean_seq ();
248+ unsigned int this_n_consumed = 0 ;
249+ KmerHashIteratorPtr kmers = new_kmer_iterator (read.cleaned_seq );
250+ while (!kmers->done ()) {
251+ HashIntoType kmer = kmers->next ();
252+ if (kmer >= interval.first && kmer < interval.second ) {
253+ if (mask->get_count (kmer) <= threshold) {
254+ count (kmer);
255+ this_n_consumed++;
256+ }
257+ }
258+ }
259+
260+ __sync_add_and_fetch ( &n_consumed, this_n_consumed );
261+ __sync_add_and_fetch ( &total_reads, 1 );
262+
263+ } // while reads left for parser
264+
265+ } // consume_seqfile_banding_with_mask
266+
155267//
156268// consume_string: run through every k-mer in the given string, & hash it.
157269//
@@ -528,6 +640,44 @@ template void Hashtable::consume_seqfile_banding<FastxReader>(
528640 unsigned long long &n_consumed
529641);
530642
643+ template void Hashtable::consume_seqfile_with_mask<FastxReader>(
644+ std::string const &filename,
645+ Hashtable* mask,
646+ unsigned int threshold,
647+ unsigned int &total_reads,
648+ unsigned long long &n_consumed
649+ );
650+
651+
652+ template void Hashtable::consume_seqfile_with_mask<FastxReader>(
653+ ReadParserPtr<FastxReader>& parser,
654+ Hashtable* mask,
655+ unsigned int threshold,
656+ unsigned int &total_reads,
657+ unsigned long long &n_consumed
658+ );
659+
660+ template void Hashtable::consume_seqfile_banding_with_mask<FastxReader>(
661+ std::string const &filename,
662+ unsigned int num_bands,
663+ unsigned int bands,
664+ Hashtable* mask,
665+ unsigned int threshold,
666+ unsigned int &total_reads,
667+ unsigned long long &n_consumed
668+ );
669+
670+
671+ template void Hashtable::consume_seqfile_banding_with_mask<FastxReader>(
672+ ReadParserPtr<FastxReader>& parser,
673+ unsigned int num_bands,
674+ unsigned int bands,
675+ Hashtable* mask,
676+ unsigned int threshold,
677+ unsigned int &total_reads,
678+ unsigned long long &n_consumed
679+ );
680+
531681
532682template uint64_t * Hashtable::abundance_distribution<FastxReader>(
533683 ReadParserPtr<FastxReader>& parser,
0 commit comments