Skip to content

Commit b6b5846

Browse files
committed
filtering options finalized
1 parent 50f30be commit b6b5846

File tree

7 files changed

+243
-133
lines changed

7 files changed

+243
-133
lines changed

docs/content/changelog.rst

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,34 +24,6 @@ Changes in deepTools4.0
2424
- options label, smartLabels, genomeChunkLength are removed.
2525
- ignoreDuplicates is removed, and (if wanted) should be set by the SamFlagExclude setting.
2626

27-
28-
29-
30-
31-
32-
33-
34-
35-
36-
37-
38-
# Good to know
39-
40-
41-
42-
43-
44-
45-
46-
47-
48-
49-
50-
51-
52-
53-
54-
5527
# Testing
5628

5729
## computeMatrix

src/alignmentsieve.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ pub fn r_alignmentsieve(
7575
Some(filterrnastrand.to_string()),
7676
None,
7777
None,
78+
None,
7879
);
7980
let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
8081
let (sieve, filtersieve, totalreads, filteredreads) = pool.install(|| {

src/bamcompare.rs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ pub fn r_bamcompare(
2828
operation: &str,
2929
pseudocount: f32,
3030
// filtering options
31-
extendreads: u32, // if 0, no extension
31+
extendreads: bool, // if 0, no extension
32+
extendreadslen: u32, // length of extension (0 if PE or if not extending)
3233
centerreads: bool,
3334
blacklist: &str, // path to blacklist filename, or 'None'
3435
minmappingquality: u8, //
@@ -74,8 +75,9 @@ pub fn r_bamcompare(
7475
_ => panic!("Error: Cannot determine filetype of blacklist file.")
7576
}
7677
}
78+
//
7779
// Set alignment filters
78-
let filters = Alignmentfilters::new(
80+
let mut filter1 = Alignmentfilters::new(
7981
blacklistregions,
8082
Some(minmappingquality),
8183
Some(samflaginclude),
@@ -86,18 +88,41 @@ pub fn r_bamcompare(
8688
None, // No offset
8789
None, // No strand filtering.
8890
Some(extendreads),
91+
Some(extendreadslen),
8992
Some(centerreads),
9093
);
94+
let mut filter2 = filter1.clone();
95+
if filter1.extendreads && filter1.extendreadslen == 0 && !ispe1 {
96+
panic!("Error: Extendreads is set, but not to a specific length (and library is single end). Specify the length with the --extendReads parameter.");
97+
}
98+
if filter2.extendreads && filter2.extendreadslen == 0 && !ispe2 {
99+
panic!("Error: Extendreads is set, but not to a specific length (and library is single end). Specify the length with the --extendReads parameter.");
100+
}
101+
if filter1.extendreads && filter1.extendreadslen == 0 && ispe1 {
102+
// We need a pass over the bamfile already to get the mean fragment length.
103+
filter1.set_extendreadslen(bamifile1, nproc, &regions);
104+
if verbose {
105+
println!("fragment length for read extension set as: {} for bamfile 1", filter1.extendreadslen);
106+
}
107+
}
108+
if filter2.extendreads && filter2.extendreadslen == 0 && ispe2 {
109+
// We need a pass over the bamfile already to get the mean fragment length.
110+
filter2.set_extendreadslen(bamifile2, nproc, &regions);
111+
if verbose {
112+
println!("fragment length for read extension set as: {} for bamfile 2", filter2.extendreadslen);
113+
}
114+
}
115+
91116
let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();
92117

93118
// Set up the bam files in a Vec.
94-
let bamfiles = vec![(bamifile1, ispe1), (bamifile2, ispe2)];
119+
let bamfiles: Vec<(&str, bool, &Alignmentfilters)> = vec![(bamifile1, ispe1, &filter1), (bamifile2, ispe2, &filter2)];
95120

96121
let mut covcalcs: Vec<ParsedBamFile> = pool.install(|| {
97122
bamfiles.par_iter()
98-
.map(|(bamfile, ispe)| {
123+
.map(|(bamfile, ispe, alfilter)| {
99124
let (bg, mapped, unmapped, readlen, fraglen) = regionblocks.par_iter()
100-
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false, false, true))
125+
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, alfilter , false, false, true))
101126
.reduce(
102127
|| (vec![], 0, 0, vec![], vec![]),
103128
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| {

src/bamcoverage.rs

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@ pub fn r_bamcoverage(
2727
// processing options
2828
mnase: bool,
2929
_offset: Py<PyList>, // list of 2 [offset 5', offset 3'], if no offset is required we have [0, 0]
30-
extendreads: u32, // if 0, no extension
31-
centerreads: bool,
30+
extendreads: bool, // true for extension of reads
31+
extendreadslen: u32, // if extendreads is set, and SE, this length is used for extension.
32+
centerreads: bool, // to center the reads or not.
3233
filterrnastrand: &str, // forward, reverse or 'None'
3334
blacklist: &str, // path to blacklist filename, or 'None'
3435
_ignorechr: Py<PyList>, // list of chromosomes to ignore. Is empty if none.
@@ -69,14 +70,21 @@ pub fn r_bamcoverage(
6970
if norm != "None" && scalefactor != 1.0 {
7071
println!("Warning: You have set a normalization option ({}), but also a scale factor. Only the scale factor will be used", norm);
7172
}
72-
if mnase && offset != (1, -1) {
73+
if mnase && offset != (0, 0) {
7374
println!("Warning: Both MNase and offset are set. The offset will be ignored !");
7475
}
7576
if offset != (0, 0) {
7677
if offset.1 > 0 && offset.1 < offset.0 {
7778
panic!("Right side offset cannot be smaller than the left side offset.");
7879
}
7980
}
81+
if extendreads && extendreadslen == 0 && !ispe {
82+
panic!("Error: Extendreads is set, but not to a specific length (and library is single end). Specify the length with the --extendReads parameter.");
83+
}
84+
if centerreads && !extendreads {
85+
println!("Warning: Centerreads is set, but extendreads is not. Centering will do nothing in this case.");
86+
}
87+
8088
if verbose {
8189
println!("Chromosomes to ignore for normalization: {:?}", ignorechr);
8290
}
@@ -120,7 +128,7 @@ pub fn r_bamcoverage(
120128
}
121129
}
122130
// Set alignment filters
123-
let filters = Alignmentfilters::new(
131+
let mut filters = Alignmentfilters::new(
124132
backlistregions,
125133
Some(minmappingquality),
126134
Some(samflaginclude),
@@ -131,8 +139,18 @@ pub fn r_bamcoverage(
131139
Some(offset),
132140
Some(filterrnastrand.to_string()),
133141
Some(extendreads),
142+
Some(extendreadslen),
134143
Some(centerreads),
135144
);
145+
// If extendreads, extendreadslen & ispe, we need a pass over the bamfile already now to get the mean fraglen.
146+
if filters.extendreads && filters.extendreadslen == 0 && ispe {
147+
// We need a pass over the bamfile already to get the mean fragment length.
148+
filters.set_extendreadslen(bamifile, nproc, &regions);
149+
if verbose {
150+
println!("fragment length for read extension set as: {}", filters.extendreadslen);
151+
}
152+
}
153+
136154
let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();
137155
let (bg, mapped, _unmapped, readlen, fraglen) = pool.install(|| {
138156
regionblocks.par_iter()
@@ -145,10 +163,12 @@ pub fn r_bamcoverage(
145163
_fraglen.extend(fraglen);
146164
_mapped += mapped;
147165
_unmapped += unmapped;
166+
148167
(_bg, _mapped, _unmapped, _readlen, _fraglen)
149168
}
150169
)
151170
});
171+
152172
let readlen = median(readlen);
153173
let fraglen = median(fraglen);
154174
if verbose {
@@ -166,7 +186,7 @@ pub fn r_bamcoverage(
166186
scalefactor,
167187
&verbose
168188
);
169-
189+
170190
// Create output stream
171191
let lines = bg.into_iter().flat_map(
172192
|bg| {
@@ -187,30 +207,7 @@ pub fn r_bamcoverage(
187207
if verbose {
188208
println!("Writing output to: {}", ofile);
189209
}
190-
191-
write_covfile(lines, ofile, ofiletype, chromsizes);
192-
193-
// // Create output stream
194-
// let lines: Vec<(String, Value)> = bg.into_par_iter().flat_map(
195-
// |bg| {
196-
// let reader = BufReader::new(File::open(bg).unwrap());
197-
// reader.lines().map(
198-
// |l| {
199-
// let l = l.unwrap();
200-
// let fields: Vec<&str> = l.split('\t').collect();
201-
// let chrom: String = fields[0].to_string();
202-
// let start: u32 = fields[1].parse().unwrap();
203-
// let end: u32 = fields[2].parse().unwrap();
204-
// let cov: f32 = fields[3].parse().unwrap();
205-
// (chrom, Value {start: start, end: end, value: cov * sf})
206-
// }
207-
// ).collect::<Vec<_>>()
208-
// }
209-
// ).collect();
210-
// if verbose {
211-
// println!("Writing output to: {}", ofile);
212-
// }
213-
// write_covfile(lines.into_iter(), ofile, ofiletype, chromsizes);
214210

211+
write_covfile(lines, ofile, ofiletype, chromsizes);
215212
Ok(())
216213
}

src/covcalc.rs

Lines changed: 8 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,6 @@ pub fn bam_pileup<'a>(
151151
let mut binsize = &bs;
152152
// constant to check if read is first in pair (relevant later)
153153
const FREAD: u16 = 0x40;
154-
155-
156154

157155
// init variables for mapping statistics and lengths
158156
let mut mapped_reads: u32 = 0;
@@ -201,7 +199,7 @@ pub fn bam_pileup<'a>(
201199
for record in bam.records() {
202200
let mut record = record.expect("Error parsing record.");
203201
if filters.filter {
204-
if filters.filter_record(&record) {
202+
if filters.filter_record(&record, &region.0.as_str()) {
205203
continue;
206204
}
207205
}
@@ -240,7 +238,7 @@ pub fn bam_pileup<'a>(
240238
for record in bam.records() {
241239
let mut record = record.expect("Error parsing record.");
242240
if filters.filter {
243-
if filters.filter_record(&record) {
241+
if filters.filter_record(&record, region.0.as_str()) {
244242
continue;
245243
}
246244
}
@@ -272,15 +270,17 @@ pub fn bam_pileup<'a>(
272270
counts = vec![0.0; (region.2 - region.1).div_ceil(*binsize) as usize];
273271
// let mut binstart = region.1;
274272
let mut binix: u32 = 0;
273+
275274
for record in bam.records() {
276275
let mut record = record.expect("Error parsing record.");
277276

278277
if filters.filter {
279-
if filters.filter_record(&record) {
278+
if filters.filter_record(&record, region.0.as_str()) {
280279
continue;
281280
}
282281
}
283282
if filters.manipulate {
283+
284284
let manipulated_blockpos = filters.manipulate_record(&mut record);
285285
if manipulated_blockpos.is_none() {
286286
continue;
@@ -380,19 +380,10 @@ pub fn bam_pileup<'a>(
380380
}
381381
let bgpath = bg.into_temp_path();
382382
let tmpvec = vec![bgpath];
383+
383384
return (tmpvec, mapped_reads, unmapped_reads, readlens, fraglens);
384385
}
385386

386-
fn pos_in_blacklist(pos: i64, chrom: &str, blacklist: &Vec<Region>) -> bool {
387-
for region in blacklist.iter() {
388-
// Note that get_startu / getendu is used as they are guaranteed to be u32's to start with.
389-
if region.get_startu() <= pos as u32 && pos as u32 <= region.get_endu() && region.chrom == chrom {
390-
return true;
391-
}
392-
}
393-
return false;
394-
}
395-
396387
#[derive(Clone, Debug)]
397388
pub struct Region {
398389
pub chrom: String,
@@ -1330,33 +1321,9 @@ pub fn region_divider(regs: &Vec<Region>) -> Vec<Vec<Region>> {
13301321
tempregionvec = Vec::new();
13311322
bplen = 0
13321323
}
1333-
// // our regions are rather large, so we can split these up (in case both start/end are Revalue:U)
1334-
// match (&reg.start, &reg.end) {
1335-
// (Revalue::U(start), Revalue::U(end)) => {
1336-
// let mut start: u32 = *start;
1337-
// let mut end: u32 = *end;
1338-
// while start < end {
1339-
// let newend = std::cmp::min(start + 10000000, end);
1340-
// let mut entryname = format!("{}:{}-{}", reg.chrom, start, newend);
1341-
// tempregionvec.push( Region {
1342-
// chrom: reg.chrom.clone(),
1343-
// start: Revalue::U(start),
1344-
// end: Revalue::U(newend),
1345-
// score: reg.score.clone(),
1346-
// strand: reg.strand.clone(),
1347-
// name: entryname,
1348-
// regionlength: newend-start
1349-
// } );
1350-
// start = newend;
1351-
// }
1352-
// blocks.push(tempregionvec);
1353-
// tempregionvec = Vec::new();
1354-
// },
1355-
// _ => {
1356-
// blocks.push(vec![reg.clone()]);
1357-
// }
1324+
13581325
blocks.push(vec![reg.clone()]);
1359-
// }
1326+
13601327
} else {
13611328
tempregionvec.push(reg.clone());
13621329
bplen += reg.regionlength;

0 commit comments

Comments
 (0)