1919#include < cmath>
2020#include < sstream>
2121#include < cstdlib>
22+ #include < cassert>
2223
2324using std::string;
2425using std::vector;
@@ -37,6 +38,8 @@ using std::ostringstream;
3738using std::istringstream;
3839using std::getline;
3940
41+ template <typename T> using num_lim = std::numeric_limits<T>;
42+
4043/* ****************************************************************************/
4144/* ****************** AUX FUNCTIONS *******************************************/
4245/* ****************************************************************************/
@@ -97,7 +100,7 @@ make_exponential_base_groups(vector<BaseGroup> &base_groups,
97100/* ************ LINEAR BASE GROUP *************/
98101// aux function to get linear interval
99102size_t
100- get_linear_interval (const size_t & num_bases) {
103+ get_linear_interval (const size_t num_bases) {
101104 // The the first 9bp as individual residues since odd stuff
102105 // can happen there, then we find a grouping value which gives
103106 // us a total set of groups below 75. We limit the intervals
@@ -174,7 +177,8 @@ double get_corrected_count(size_t count_at_limit,
174177 size_t num_reads,
175178 size_t dup_level,
176179 size_t num_obs) {
177- // See if we can bail out early
180+ // See if we can bail out early (ADS: can we know if num_reads <=
181+ // count_at_limit always holds?)
178182 if (count_at_limit == num_reads)
179183 return num_obs;
180184
@@ -210,7 +214,7 @@ double get_corrected_count(size_t count_at_limit,
210214
211215 // Now we can assume that the number we observed can be
212216 // scaled up by this proportion
213- return num_obs/( 1 - p_not_seeing);
217+ return num_obs/std::max (num_lim< double >:: min (), 1.0 - p_not_seeing);
214218}
215219
216220// Function to calculate the deviation of a histogram with 100 bins from a
@@ -277,7 +281,8 @@ sum_deviation_from_normal(const array <double, 101> &gc_count,
277281 // centre of the model
278282 mode = first_mode;
279283 } else {
280- mode /= mode_duplicates;
284+ // ADS: check if we need to avoid divide-by-zero here
285+ mode /= std::max (static_cast <size_t >(1 ), mode_duplicates);
281286 }
282287
283288 // We can now work out a theoretical distribution
@@ -286,7 +291,8 @@ sum_deviation_from_normal(const array <double, 101> &gc_count,
286291 stdev += (i - mode) * (i - mode) * gc_count[i];
287292 }
288293
289- stdev = stdev / (total_count-1 );
294+ // ADS: check if we need to avoid divide-by-zero here
295+ stdev = stdev / std::max (num_lim<double >::min (), total_count - 1.0 );
290296 stdev = sqrt (stdev);
291297
292298 /* ****************** END COPIED FROM FASTQC **********************/
@@ -297,20 +303,24 @@ sum_deviation_from_normal(const array <double, 101> &gc_count,
297303 // ADS: lonely magic below; what is the 100?
298304 for (size_t i = 0 ; i <= 100 ; ++i) {
299305 z = i - mode;
306+ // ADS: check if we need to avoid divide-by-zero here
300307 theoretical[i] = exp (- (z*z)/ (2.0 * stdev *stdev));
301308 theoretical_sum += theoretical[i];
302309 }
303310
304311 // Normalize theoretical so it sums to the total of readsq
305312 for (size_t i = 0 ; i <= 100 ; ++i) {
306- theoretical[i] = theoretical[i] * total_count / theoretical_sum;
313+ // ADS: check if we need to avoid divide-by-zero here
314+ theoretical[i] = theoretical[i] * total_count /
315+ std::max (num_lim<double >::min (), theoretical_sum);
307316 }
308317
309318 for (size_t i = 0 ; i <= 100 ; ++i) {
310319 ans += fabs (gc_count[i] - theoretical[i]);
311320 }
312- // Fractional deviation
313- return 100.0 * ans / total_count;
321+ // Fractional deviation (ADS: check if we need to avoid
322+ // divide-by-zero here)
323+ return 100.0 * ans / std::max (num_lim<double >::min (), total_count);
314324}
315325
316326/* **************************************************************/
@@ -446,15 +456,16 @@ ModuleBasicStatistics::summarize_module(FastqStats &stats) {
446456 total_bases += i * stats.long_read_length_freq [i - FastqStats::SHORT_READ_THRESHOLD];
447457 }
448458
449- avg_read_length = total_bases / total_sequences;
459+ avg_read_length =
460+ total_bases / std::max (static_cast <size_t >(1 ), total_sequences);
450461
451462 // counts bases G and C in each base position
452463 avg_gc = 0 ;
453464
454465 // GC %
455466 // GS: TODO delete gc calculation during stream and do it using the total G
456467 // counts in all bases
457- avg_gc = 100 * stats.total_gc / static_cast <double >(total_bases);
468+ avg_gc = 100 * stats.total_gc / std::max ( 1.0 , static_cast <double >(total_bases) );
458469
459470}
460471
@@ -692,6 +703,7 @@ ModulePerBaseSequenceQuality::summarize_module(FastqStats &stats) {
692703 }
693704
694705 const size_t base_positions = base_groups[group].end - base_groups[group].start + 1 ;
706+ assert (base_positions != static_cast <size_t >(0 ));
695707 group_mean[group] = mean_group_sum / base_positions;
696708 group_ldecile[group] = static_cast <double >(ldecile_group_sum) / base_positions;
697709 group_lquartile[group] = static_cast <double >(lquartile_group_sum) / base_positions;
@@ -819,17 +831,19 @@ ModulePerTileSequenceQuality::summarize_module(FastqStats &stats) {
819831
820832 // Now transform sum into mean
821833 for (size_t i = 0 ; i < max_read_length; ++i)
822- if (position_counts[i] > 0 )
834+ if (position_counts[i] > 0.0 )
823835 mean_in_base[i] = mean_in_base[i] / position_counts[i];
824836 else
825- mean_in_base[i] = 0 ;
837+ mean_in_base[i] = 0.0 ;
826838
827839 for (auto &v : tile_position_quality) {
828840 const size_t lim = v.second .size ();
829841 for (size_t i = 0 ; i < lim; ++i) {
830842 // transform sum of all qualities in mean
831- const size_t count_at_pos =
832- stats.tile_position_count .find (v.first )->second [i];
843+ const auto itr = stats.tile_position_count .find (v.first );
844+ if (itr == cend (stats.tile_position_count ))
845+ throw runtime_error (" failure ModulePerTileSequenceQuality::summarize_module" );
846+ const size_t count_at_pos = itr->second [i];
833847
834848 if (count_at_pos > 0 )
835849 v.second [i] = v.second [i] / count_at_pos;
@@ -882,6 +896,7 @@ ModulePerTileSequenceQuality::write_module(ostream &os) {
882896
883897inline double
884898round_quantile (const double val, const double num_quantiles) {
899+ // ADS: check if we need to worry about divide by zero here
885900 return static_cast <int >(val * num_quantiles) / num_quantiles;
886901}
887902
@@ -937,6 +952,7 @@ ModulePerTileSequenceQuality::make_html_data() {
937952 // We will now discretize the quantiles so plotly understands
938953 // the color scheme
939954 static const double num_quantiles = 20.0 ;
955+ // ADS: not sure if we need to worry about divide by zero here?
940956 double mid_point = round_quantile (min_val/(min_val - max_val), num_quantiles);
941957
942958 // - 10: red
@@ -1054,7 +1070,7 @@ Module(ModulePerBaseSequenceContent::module_name) {
10541070void
10551071ModulePerBaseSequenceContent::summarize_module (FastqStats &stats) {
10561072 double a_group, t_group, g_group, c_group, n_group;
1057- double a_pos, t_pos, g_pos, c_pos, n_pos;
1073+ double a_pos{} , t_pos{} , g_pos{} , c_pos{} , n_pos{} ;
10581074 double total; // a+c+t+g+n
10591075 max_diff = 0.0 ;
10601076
@@ -1105,10 +1121,10 @@ ModulePerBaseSequenceContent::summarize_module(FastqStats &stats) {
11051121
11061122 const double total_pos =
11071123 static_cast <double >(a_pos + c_pos + g_pos + t_pos + n_pos);
1108- a_pos = 100.0 * a_pos / total_pos;
1109- c_pos = 100.0 * c_pos / total_pos;
1110- g_pos = 100.0 * g_pos / total_pos;
1111- t_pos = 100.0 * t_pos / total_pos;
1124+ a_pos = 100.0 * a_pos / std::max (num_lim< double >:: min (), total_pos) ;
1125+ c_pos = 100.0 * c_pos / std::max (num_lim< double >:: min (), total_pos) ;
1126+ g_pos = 100.0 * g_pos / std::max (num_lim< double >:: min (), total_pos) ;
1127+ t_pos = 100.0 * t_pos / std::max (num_lim< double >:: min (), total_pos) ;
11121128
11131129 // for WGBS, we only test non-bisulfite treated bases
11141130 if (!is_reverse_complement)
@@ -1135,11 +1151,10 @@ ModulePerBaseSequenceContent::summarize_module(FastqStats &stats) {
11351151
11361152 // turns above values to percent
11371153 total = static_cast <double >(a_group + c_group + t_group + g_group + n_group);
1138- a_pct[group] = 100.0 *a_group / total;
1139- c_pct[group] = 100.0 *c_group / total;
1140- g_pct[group] = 100.0 *g_group / total;
1141- t_pct[group] = 100.0 *t_group / total;
1142-
1154+ a_pct[group] = 100.0 *a_group / std::max (num_lim<double >::min (), total);
1155+ c_pct[group] = 100.0 *c_group / std::max (num_lim<double >::min (), total);
1156+ g_pct[group] = 100.0 *g_group / std::max (num_lim<double >::min (), total);
1157+ t_pct[group] = 100.0 *t_group / std::max (num_lim<double >::min (), total);
11431158 }
11441159}
11451160
@@ -1395,12 +1410,14 @@ ModulePerBaseNContent::summarize_module(FastqStats &stats) {
13951410
13961411 this_n_total = (i < FastqStats::SHORT_READ_THRESHOLD) ? (stats.cumulative_read_length_freq [i]) :
13971412 (stats.long_cumulative_read_length_freq [i - FastqStats::SHORT_READ_THRESHOLD]);
1398- this_n_pct = this_n_cnt / static_cast <double >(this_n_total);
1413+ this_n_pct = this_n_cnt / std::max (num_lim<double >::min (),
1414+ static_cast <double >(this_n_total));
13991415 max_n_pct = max (max_n_pct, this_n_pct);
14001416 group_n_cnt += this_n_cnt;
14011417 group_n_total += this_n_total;
14021418 }
1403- n_pct[group] = 100.0 *group_n_cnt / static_cast <double >(group_n_total);
1419+ n_pct[group] = 100.0 *group_n_cnt / std::max (num_lim<double >::min (),
1420+ static_cast <double >(group_n_total));
14041421 }
14051422}
14061423
@@ -1627,15 +1644,15 @@ ModuleSequenceDuplicationLevels::summarize_module(FastqStats &stats) {
16271644 }
16281645
16291646 // "Sequence duplication estimate" in the summary
1630- total_deduplicated_pct = 100.0 * seq_dedup / seq_total;
1647+ total_deduplicated_pct = 100.0 * seq_dedup / std::max ( 1.0 , seq_total) ;
16311648
16321649 // Convert to percentage
16331650 for (auto &v : percentage_deduplicated)
1634- v = 100.0 * v / seq_dedup; // Percentage of unique sequences in bin
1651+ v = 100.0 * v / std::max ( 1.0 , seq_dedup) ; // Percentage of unique sequences in bin
16351652
16361653 // Convert to percentage
16371654 for (auto &v : percentage_total)
1638- v = 100.0 * v / seq_total; // Percentage of sequences in bin
1655+ v = 100.0 * v / std::max ( 1.0 , seq_total) ; // Percentage of sequences in bin
16391656}
16401657
16411658void
@@ -1796,7 +1813,7 @@ ModuleOverrepresentedSequences::make_grade() {
17961813 // implment pass warn fail for overrep sequences
17971814 if (grade != " fail" ) {
17981815 // get percentage that overrep reads represent
1799- double pct = 100.0 * seq.second / num_reads;
1816+ double pct = 100.0 * seq.second / std::max ( static_cast < size_t >( 1 ), num_reads) ;
18001817 if (pct > grade_error) {
18011818 grade = " fail" ;
18021819 }
@@ -1813,7 +1830,7 @@ ModuleOverrepresentedSequences::write_module(ostream &os) {
18131830 os << " #Sequence\t Count\t Percentage\t Possible Source\n " ;
18141831 for (auto seq : overrep_sequences) {
18151832 os << seq.first << " \t " << seq.second << " \t " <<
1816- 100.0 * seq.second / num_reads << " \t "
1833+ 100.0 * seq.second / std::max ( static_cast < size_t >( 1 ), num_reads) << " \t "
18171834 << get_matching_contaminant (seq.first ) << " \n " ;
18181835 }
18191836}
@@ -1836,7 +1853,7 @@ ModuleOverrepresentedSequences::make_html_data() {
18361853 for (auto v : overrep_sequences) {
18371854 data << " <tr><td>" << v.first << " </td>" ;
18381855 data << " <td>" << v.second << " </td>" ;
1839- data << " <td>" << 100.0 * v.second / num_reads << " </td>" ;
1856+ data << " <td>" << 100.0 * v.second / std::max ( static_cast < size_t >( 1 ), num_reads) << " </td>" ;
18401857 data << " <td>" << get_matching_contaminant (v.first )
18411858 << " </td>" ;
18421859 data << " </tr>" ;
@@ -1907,7 +1924,8 @@ ModuleAdapterContent::summarize_module(FastqStats &stats) {
19071924 for (size_t i = 0 ; i < adapter_pos_pct.size (); ++i) {
19081925 for (size_t j = 0 ; j < adapter_pos_pct[0 ].size (); ++j) {
19091926 adapter_pos_pct[i][j] *= 100.0 ;
1910- adapter_pos_pct[i][j] /= static_cast <double >(stats.num_reads );
1927+ adapter_pos_pct[i][j] /= std::max (num_lim<double >::min (),
1928+ static_cast <double >(stats.num_reads ));
19111929 }
19121930 }
19131931 }
@@ -2077,7 +2095,8 @@ ModuleKmerContent::summarize_module(FastqStats &stats) {
20772095 observed_count =
20782096 stats.kmer_count [(i << Constants::bit_shift_kmer) | kmer];
20792097
2080- expected_count = pos_kmer_count[i] / dividend;
2098+ expected_count = pos_kmer_count[i] / std::max (num_lim<double >::min (), dividend);
2099+ // ADS: below, denom can't be zero if not above?
20812100 obs_exp_ratio = (expected_count > 0 ) ? (observed_count / expected_count) : 0 ;
20822101
20832102 if (i == 0 || obs_exp_ratio > obs_exp_max[kmer]) {
@@ -2146,7 +2165,7 @@ ModuleKmerContent::make_html_data() {
21462165
21472166 for (size_t i = 0 ; i < lim; ++i) {
21482167 const size_t kmer = kmers_to_report[i].first ;
2149- const double log_obs_exp = log (kmers_to_report[i].second )/log (2 );
2168+ const double log_obs_exp = log (kmers_to_report[i].second )/log (2.0 );
21502169 if (!seen_first)
21512170 seen_first = true ;
21522171 else
0 commit comments