Skip to content

Commit a1303e4

Browse files
committed
proper offset implementation, non offsets encoded as (0,0)
1 parent a6af11e commit a1303e4

File tree

4 files changed

+67
-26
lines changed

4 files changed

+67
-26
lines changed

pydeeptools/deeptools/bamCoverage2.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,11 @@ def main(args=None):
141141
args.effectiveGenomeSize = 0
142142
if not args.normalizeUsing:
143143
args.normalizeUsing = 'None'
144+
print(args.Offset)
144145
if not args.Offset:
145-
args.Offset = [1, -1]
146+
args.Offset = [0, 0]
147+
elif len(args.Offset) == 1:
148+
args.Offset = [args.Offset[0], 0]
146149
if not args.extendReads:
147150
args.extendReads = 0
148151
if not args.filterRNAstrand:

src/bamcoverage.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use pyo3::prelude::*;
22
use pyo3::types::PyList;
33
use rayon::prelude::*;
44
use rayon::ThreadPoolBuilder;
5+
use core::panic;
56
use std::io::prelude::*;
67
use std::io::BufReader;
78
use std::fs::File;
@@ -25,7 +26,7 @@ pub fn r_bamcoverage(
2526
scalefactor: f32, // default 1.0
2627
// processing options
2728
mnase: bool,
28-
_offset: Py<PyList>, // list of max 2 [offset 5', offset 3'], if no offset is required we have [1, -1]
29+
_offset: Py<PyList>, // list of 2 [offset 5', offset 3'], if no offset is required we have [0, 0]
2930
extendreads: u32, // if 0, no extension
3031
centerreads: bool,
3132
filterrnastrand: &str, // forward, reverse or 'None'
@@ -45,14 +46,14 @@ pub fn r_bamcoverage(
4546
verbose: bool,
4647
collapse: bool,
4748
) -> PyResult<()> {
48-
let mut offset: (i32, i32) = (1, -1);
49+
let mut offset: (i32, i32) = (0, 0);
4950
let mut ignorechr: Vec<String> = Vec::new();
5051
Python::with_gil(|py| {
5152
let offsetv: Vec<i32> = _offset.extract(py).expect("Failed to retrieve offset.");
52-
if offsetv.len() == 1 {
53-
offset = (offsetv[0], -1);
54-
} else if offsetv.len() == 2 {
53+
if offsetv.len() == 2 {
5554
offset = (offsetv[0], offsetv[1]);
55+
} else {
56+
panic!("Error: Offset should be a list of 2. Received: {:?}", offsetv);
5657
}
5758
ignorechr = _ignorechr.extract(py).expect("Failed to retrieve ignorechr.");
5859
});
@@ -71,10 +72,7 @@ pub fn r_bamcoverage(
7172
if mnase && offset != (1, -1) {
7273
println!("Warning: Both MNase and offset are set. The offset will be ignored !");
7374
}
74-
if offset != (1, -1) {
75-
if offset.0 == 0 {
76-
panic!("Offsets cannot be 0 as they are 1-based positions inside each alignment.");
77-
}
75+
if offset != (0, 0) {
7876
if offset.1 > 0 && offset.1 < offset.0 {
7977
panic!("Right side offset cannot be smaller than the left side offset.");
8078
}

src/covcalc.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -281,15 +281,16 @@ pub fn bam_pileup<'a>(
281281
}
282282
}
283283
if filters.manipulate {
284-
let manipulated_blocks = filters.manipulate_record(&mut record);
285-
if manipulated_blocks.is_none() {
284+
let manipulated_blockpos = filters.manipulate_record(&mut record);
285+
if manipulated_blockpos.is_none() {
286286
continue;
287287
}
288-
let block = manipulated_blocks.unwrap();
289-
let ixstart = ((block[0] - region.1) / binsize) as usize;
290-
let ixend = ((block[1] - region.1) / binsize) as usize;
288+
let indices: HashSet<usize> = manipulated_blockpos
289+
.unwrap()
290+
.into_iter()
291+
.map(|x| ((x - region.1) / binsize) as usize)
292+
.collect();
291293

292-
let indices: HashSet<usize> = (ixstart..ixend).collect();
293294
indices.into_iter()
294295
.for_each(|ix| {
295296
if ix < counts.len() {

src/filtering.rs

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@ impl Alignmentfilters {
4040
let _mifl = minfraglen.unwrap_or(0);
4141
let _mafl = maxfraglen.unwrap_or(0);
4242
let _mnase = mnase.unwrap_or(false);
43-
let _offset = offset.unwrap_or((1, -1));
43+
let _offset = offset.unwrap_or((0, 0));
4444
let _frs = filterrnastrand.unwrap_or(String::from("None"));
4545
let _extend = extendreads.unwrap_or(0);
4646
let _center = centerreads.unwrap_or(false);
4747

4848
// Set the manipulate bool for a quick escape in case manipulation is not needed.
49-
if _offset != (1, -1) || _mnase || _extend > 0 || _center {
49+
if _offset != (0, 0) || _mnase || _extend > 0 || _center {
5050
manipulate = true;
5151
}
5252

@@ -144,7 +144,7 @@ impl Alignmentfilters {
144144
}
145145
}
146146

147-
pub fn manipulate_record(&self, rec: &Record) -> Option<[u32; 2]> {
147+
pub fn manipulate_record(&self, rec: &Record) -> Option<Vec<u32>> {
148148
// Not just simple yes - no filtering, but actually manipulating the record.
149149
// In general, this is the case for MNase mode, offset, extendreads and centerreads.
150150
if self.mnase {
@@ -154,26 +154,65 @@ impl Alignmentfilters {
154154
let rinsertsize = rec.insert_size().abs() as u32;
155155
if rec.is_proper_pair() && !rec.is_reverse() && rinsertsize > 1 {
156156
// Not sure why the insert size test is here, but note we always filter prior to manipulating.
157-
// insertsize even
158157
let recpos: u32 = rec.pos() as u32;
159158
let frag_start = recpos - 1 + rinsertsize / 2;
160159

161160
if rinsertsize % 2 == 0 {
162-
//
163161
return Some(
164-
[frag_start, frag_start + 2]
162+
(frag_start..frag_start + 2).collect()
165163
);
166164
} else {
167-
// Uneven middle, frag start + 4 to keep output consistent with deeptools 3.
168165
return Some(
169-
[frag_start, frag_start + 4]
166+
(frag_start..frag_start+4).collect()
170167
);
171168
}
172169
}
173170
return None;
174171
}
175-
if self.offset != (1, -1) {
176-
println!("Offset implementation");
172+
if self.offset != (0, 0) {
173+
// Collect blocks and flatten them out.
174+
let mut blockvec: Vec<u32> = rec
175+
.aligned_blocks()
176+
.flat_map(|x| x[0] as u32..x[1] as u32)
177+
.collect();
178+
let blocklen = blockvec.len() as i32;
179+
180+
// Convert potential negative indices to positive indices
181+
// It could be that for the offset only one value is given, in which case we only use that site
182+
if self.offset.1 == 0 {
183+
//
184+
let pos = if self.offset.0 < 0 {blocklen + self.offset.0 } else {self.offset.0 - 1};
185+
if pos < 0 || pos >= blocklen {
186+
return None;
187+
}
188+
189+
if rec.is_reverse() {
190+
blockvec.reverse();
191+
blockvec = blockvec[pos as usize..pos as usize + 1].to_vec();
192+
blockvec.reverse();
193+
return Some(blockvec);
194+
} else {
195+
blockvec = blockvec[pos as usize..pos as usize + 1].to_vec();
196+
return Some(blockvec);
197+
}
198+
} else {
199+
let start = if self.offset.0 < 0 { blocklen + self.offset.0 } else { self.offset.0 -1};
200+
let end = if self.offset.1 < 0 { blocklen + self.offset.1 + 1 } else { self.offset.1 };
201+
202+
// if the range falls outside the vec, return none (retain deeptools 3 behavior)
203+
if start < 0 || end < 0 || start >= blocklen || end >= blocklen || start >= end {
204+
return None;
205+
}
206+
if rec.is_reverse() {
207+
blockvec.reverse();
208+
blockvec = blockvec[start as usize..end as usize].to_vec();
209+
blockvec.reverse();
210+
return Some(blockvec);
211+
} else {
212+
blockvec = blockvec[start as usize..end as usize].to_vec();
213+
return Some(blockvec);
214+
}
215+
}
177216
}
178217
if self.extendreads > 0 {
179218
println!("extendreads implementation");

0 commit comments

Comments
 (0)