Skip to content

Commit 1b42a04

Browse files
committed
weed out last computematrix bugs in negative stranded bed/gtf
1 parent 1411bee commit 1b42a04

File tree

2 files changed

+83
-61
lines changed

2 files changed

+83
-61
lines changed

src/covcalc.rs

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -533,9 +533,9 @@ impl Region {
533533

534534
let mut absstart: i64 = anchorstart as i64 - scale_regions.upstream as i64;
535535
let absstop: i64 = anchorstop as i64 + scale_regions.downstream as i64;
536-
536+
println!("+ - U, absstart = {}, anchorstart = {}", absstart, anchorstart);
537537
for binix in (absstart..anchorstart as i64).step_by(scale_regions.binsize as usize) {
538-
if binix < 0 || binix as u32 >= chromend || (binix + scale_regions.binsize as i64) as u32 >= chromend {
538+
if binix < 0 || binix as u32 > chromend || (binix + scale_regions.binsize as i64) as u32 > chromend {
539539
leftbins.push(Bin::Conbin(0,0));
540540
} else if scale_regions.nan_after_end && binix as u32 <= *start {
541541
leftbins.push(Bin::Conbin(0,0));
@@ -545,7 +545,7 @@ impl Region {
545545
}
546546

547547
for binix in (anchorstop as i64..absstop).step_by(scale_regions.binsize as usize) {
548-
if binix < 0 || binix as u32 >= chromend || (binix + scale_regions.binsize as i64) as u32 >= chromend {
548+
if binix < 0 || binix as u32 > chromend || (binix + scale_regions.binsize as i64) as u32 > chromend {
549549
rightbins.push(Bin::Conbin(0,0));
550550
} else if scale_regions.nan_after_end && binix as u32 >= *end {
551551
rightbins.push(Bin::Conbin(0,0));
@@ -566,6 +566,7 @@ impl Region {
566566
for bin in rightbins.into_iter() {
567567
bins.push(bin);
568568
}
569+
569570
}
570571
(Revalue::V(start), Revalue::V(end)) => {
571572
let exons: Vec<(u32, u32)> = start.iter().zip(end.iter())
@@ -576,7 +577,7 @@ impl Region {
576577
let mut lastanchor: u32 = anchorstop;
577578
let mut walked_bps: u32 = 0;
578579
while walked_bps < scale_regions.downstream {
579-
if lastanchor >= chromend {
580+
if lastanchor > chromend {
580581
rightbins.push(Bin::Conbin(0,0));
581582
walked_bps += scale_regions.binsize;
582583
} else {
@@ -597,6 +598,7 @@ impl Region {
597598
let mut leftbins: Vec<Bin> = Vec::new();
598599
let mut lastanchor: u32 = anchorstart;
599600
let mut walked_bps: u32 = 0;
601+
600602
while walked_bps < scale_regions.upstream {
601603
if lastanchor == 0 {
602604
leftbins.push(Bin::Conbin(0,0));
@@ -642,33 +644,33 @@ impl Region {
642644

643645
let mut absstart: i64 = anchorstop as i64 + scale_regions.upstream as i64;
644646
let absstop: i64 = anchorstart as i64 - scale_regions.downstream as i64;
645-
647+
646648
let steps: Vec<_> = (anchorstop as i64..absstart)
647649
.step_by(scale_regions.binsize as usize)
648650
.collect();
649651
for binix in steps.into_iter().rev() {
650652
if binix as u32 > chromend || (binix + scale_regions.binsize as i64) as u32 > chromend {
651653
rightbins.push(Bin::Conbin(0,0));
652654
} else if scale_regions.nan_after_end && binix as u32 >= *end {
653-
leftbins.push(Bin::Conbin(0,0));
655+
rightbins.push(Bin::Conbin(0,0));
654656
} else {
655-
leftbins.push(Bin::Conbin(binix as u32, (binix as u32) + scale_regions.binsize));
657+
rightbins.push(Bin::Conbin(binix as u32, (binix as u32) + scale_regions.binsize));
656658
}
657659
}
658-
660+
println!("- - U, absstop = {}, anchorstart = {}", absstop, anchorstart);
659661
let steps: Vec<_> = (absstop..anchorstart as i64)
660662
.step_by(scale_regions.binsize as usize)
661663
.collect();
662664
for binix in steps.into_iter().rev() {
663665
if binix < 0 {
664666
leftbins.push(Bin::Conbin(0,0));
665667
} else if scale_regions.nan_after_end && binix as u32 + scale_regions.binsize <= *start {
666-
rightbins.push(Bin::Conbin(0,0));
668+
leftbins.push(Bin::Conbin(0,0));
667669
} else {
668-
rightbins.push(Bin::Conbin(binix as u32, (binix as u32) + scale_regions.binsize));
670+
leftbins.push(Bin::Conbin(binix as u32, (binix as u32) + scale_regions.binsize));
669671
}
670672
}
671-
673+
672674
for bin in rightbins.into_iter() {
673675
bins.push(bin);
674676
}
@@ -692,7 +694,7 @@ impl Region {
692694
let mut lastanchor: u32 = anchorstop;
693695
let mut walked_bps: u32 = 0;
694696
while walked_bps < scale_regions.upstream {
695-
if lastanchor >= chromend {
697+
if lastanchor > chromend {
696698
rightbins.push(Bin::Conbin(0,0));
697699
walked_bps += scale_regions.binsize;
698700
} else {
@@ -731,6 +733,7 @@ impl Region {
731733
lastanchor = retanch;
732734
}
733735
}
736+
734737
// Note that now we need to go the exact opposite way as for the + strand as the 'highest position' is the 'starting point'.
735738
rightbins.reverse();
736739
for bin in rightbins.into_iter() {
@@ -803,21 +806,19 @@ impl Region {
803806
}
804807
let bodystart = *start + scale_regions.unscaled5prime;
805808
let bodyend = *end - scale_regions.unscaled3prime;
806-
809+
807810
// Get the bins over the body length. These need to be scaled, so similar to deeptools < 4, linspace is used.
808811
let neededbins = (scale_regions.regionbodylength / scale_regions.binsize) as usize;
809812
// There's multiple options here:
810813
// transcriptlength >= regionbodylength -> linspace
811814
// regionbodylength / binsize > transcriptlength <= regionbodylength -> 1 >= binsize > binsize.
812815
// transcriptlength <= regionbodylength / binsize -> index repetitions with binsize of one.
813816
let scaledbinsize = std::cmp::min(std::cmp::max((bodyend - bodystart) / neededbins as u32, 1), scale_regions.binsize);
814-
815817
innerbins.extend( Array1::linspace(bodystart as f32, (bodyend - scaledbinsize) as f32, neededbins)
816-
.mapv(|x| x as u32)
818+
.mapv(|x| x.round() as u32)
817819
.map(|x| Bin::Conbin(*x, *x + scaledbinsize))
818820
.into_iter()
819821
.collect::<Vec<_>>() );
820-
821822
// Combine the vectors and return
822823
let mut combined_bins = Vec::new();
823824
if scale_regions.unscaled5prime > 0 {
@@ -872,11 +873,11 @@ impl Region {
872873
}
873874
}
874875
un3bins.reverse();
875-
876+
876877
let bodystart: u32;
877878
let bodyend: u32;
878879
if scale_regions.unscaled5prime > 0 {
879-
bodystart = un5bins.last().unwrap().get_end();
880+
bodystart = un5bins.last().unwrap().get_end() - 1;
880881
} else {
881882
bodystart = *start.first().unwrap();
882883
}
@@ -932,7 +933,7 @@ impl Region {
932933
}
933934

934935
let innerbins = Array1::linspace(0 as f32, ((truebodylength)/scaledbinsize) as f32, neededbins)
935-
.mapv(|x| x as u32)
936+
.mapv(|x| x.round() as u32)
936937
.map(|x| binmap.get(&x).unwrap().clone())
937938
.into_iter()
938939
.collect::<Vec<Bin>>();
@@ -959,45 +960,36 @@ impl Region {
959960
let mut un5bins: Vec<Bin> = Vec::new();
960961
let mut un3bins: Vec<Bin> = Vec::new();
961962
let mut innerbins: Vec<Bin> = Vec::new();
962-
if scale_regions.unscaled5prime > 0 {
963-
un5bins.extend((0..scale_regions.unscaled5prime)
963+
if scale_regions.unscaled3prime > 0 {
964+
un3bins.extend((0..scale_regions.unscaled3prime)
964965
.step_by(scale_regions.binsize as usize)
965-
.map(|i| Bin::Conbin(*end - i - scale_regions.binsize, *end - i))
966+
.map(|i| Bin::Conbin(*start + i, *start + i + scale_regions.binsize))
966967
.collect::<Vec<Bin>>());
967968
}
968969

969-
if scale_regions.unscaled3prime > 0 {
970-
un3bins.extend( (0..scale_regions.unscaled3prime)
970+
if scale_regions.unscaled5prime > 0 {
971+
un5bins.extend( (0..scale_regions.unscaled5prime)
971972
.step_by(scale_regions.binsize as usize)
972973
.rev()
973-
.map(|i| Bin::Conbin(*start + scale_regions.unscaled3prime - i - scale_regions.binsize, *start + scale_regions.unscaled3prime - i))
974+
.map(|i| Bin::Conbin(*end - i - scale_regions.binsize, *end - i))
974975
.collect::<Vec<Bin>>() );
975976
}
976977
let bodystart = *start + scale_regions.unscaled3prime;
977978
let bodyend = *end - scale_regions.unscaled5prime;
978-
979+
979980
// Get the bins over the body length. These need to be scaled, so similar to deeptools < 4, linspace is used.
980981
let neededbins = (scale_regions.regionbodylength / scale_regions.binsize) as usize;
981982
// There's multiple options here:
982983
// transcriptlength >= regionbodylength -> linspace
983984
// regionbodylength / binsize > transcriptlength <= regionbodylength -> 1 >= binsize > binsize.
984985
// transcriptlength <= regionbodylength / binsize -> index repetitions with binsize of one.
985-
let mut scaledbinsize = (bodyend - bodystart)/neededbins as u32;
986-
if scaledbinsize == 0 {
987-
scaledbinsize = 1;
988-
}
989-
if scaledbinsize > scale_regions.binsize {
990-
scaledbinsize = scale_regions.binsize;
991-
}
992-
986+
let scaledbinsize = std::cmp::min(std::cmp::max((bodyend - bodystart) / neededbins as u32, 1), scale_regions.binsize);
993987
innerbins.extend( Array1::linspace(bodystart as f32, (bodyend - scaledbinsize) as f32, neededbins)
994-
.mapv(|x| x as u32)
988+
.mapv(|x| x.round() as u32)
995989
.map(|x| Bin::Conbin(*x, *x + scaledbinsize))
996990
.into_iter()
997991
.collect::<Vec<_>>() );
998-
999-
// Reverse innerbins to go from 3' -> 5'
1000-
innerbins.reverse();
992+
println!("");
1001993
// Combine the vectors and return
1002994
let mut combined_bins = Vec::new();
1003995
if scale_regions.unscaled3prime > 0 {
@@ -1034,7 +1026,6 @@ impl Region {
10341026
}
10351027
}
10361028
un5bins.reverse();
1037-
10381029
if scale_regions.unscaled3prime > 0 {
10391030
let mut walked_bps: u32 = 0;
10401031
let mut lastanchor: u32 = start[0];
@@ -1053,18 +1044,18 @@ impl Region {
10531044
lastanchor = retanch;
10541045
}
10551046
}
1056-
1047+
10571048
let bodystart: u32;
10581049
let bodyend: u32;
1059-
if scale_regions.unscaled3prime > 0 {
1060-
bodystart = un3bins.last().unwrap().get_end();
1061-
} else {
1062-
bodystart = *start.first().unwrap();
1063-
}
10641050
if scale_regions.unscaled5prime > 0 {
10651051
bodyend = un5bins.first().unwrap().get_start();
10661052
} else {
1067-
bodyend = *end.last().unwrap();
1053+
bodyend = *start.first().unwrap();
1054+
}
1055+
if scale_regions.unscaled3prime > 0 {
1056+
bodystart = un3bins.last().unwrap().get_end() - 1;
1057+
} else {
1058+
bodystart = *end.last().unwrap();
10681059
}
10691060
let truebodylength = self.regionlength - scale_regions.unscaled5prime - scale_regions.unscaled3prime;
10701061
let neededbins = (scale_regions.regionbodylength / scale_regions.binsize) as usize;
@@ -1113,19 +1104,20 @@ impl Region {
11131104
}
11141105

11151106
let innerbins = Array1::linspace(0 as f32, ((truebodylength)/scaledbinsize) as f32, neededbins)
1116-
.mapv(|x| x as u32)
1107+
.mapv(|x| x.round() as u32)
11171108
.map(|x| binmap.get(&x).unwrap().clone())
11181109
.into_iter()
11191110
.collect::<Vec<Bin>>();
1111+
11201112
// Combine the vectors and return
11211113
let mut combined_bins = Vec::new();
1122-
if scale_regions.unscaled5prime > 0 {
1123-
combined_bins.extend(un5bins.into_iter());
1124-
}
1125-
combined_bins.extend(innerbins.into_iter());
11261114
if scale_regions.unscaled3prime > 0 {
11271115
combined_bins.extend(un3bins.into_iter());
11281116
}
1117+
combined_bins.extend(innerbins.into_iter());
1118+
if scale_regions.unscaled5prime > 0 {
1119+
combined_bins.extend(un5bins.into_iter());
1120+
}
11291121
return combined_bins;
11301122
},
11311123
_ => panic!("Start and End are not either both u32, or Vecs. This means your regions file is ill-defined."),
@@ -1150,6 +1142,9 @@ fn refpoint_exonwalker(exons: &Vec<(u32, u32)>, anchor: u32, binsize: u32, chrom
11501142
match anchorix {
11511143
Some(i) => {
11521144
// anchor sits in an exon. Check if anchor + binsize is also in same exon.
1145+
if anchor + binsize > chromend {
1146+
return (Bin::Conbin(0,0), chromend);
1147+
}
11531148
if anchor + binsize <= exons[i].1 {
11541149
(Bin::Conbin(anchor, anchor + binsize), anchor + binsize)
11551150
} else {
@@ -1217,7 +1212,7 @@ fn refpoint_exonwalker(exons: &Vec<(u32, u32)>, anchor: u32, binsize: u32, chrom
12171212
None => {
12181213
// our anchor doesn't sit in exons. We just return the anchor + binsize as Bin
12191214
if anchor + binsize > chromend {
1220-
(Bin::Conbin(anchor, chromend), chromend)
1215+
(Bin::Conbin(0, 0), chromend)
12211216
} else {
12221217
(Bin::Conbin(anchor, anchor + binsize), anchor + binsize)
12231218
}
@@ -1227,6 +1222,11 @@ fn refpoint_exonwalker(exons: &Vec<(u32, u32)>, anchor: u32, binsize: u32, chrom
12271222
// Walk backwards (upstream, towards chromosome start)
12281223
match anchorix {
12291224
Some(i) => {
1225+
// Run a check to see if binsize > anchor.
1226+
if anchor < binsize {
1227+
// We are at the start of the chromosome. We need to return a Conbin.
1228+
return (Bin::Conbin(0, 0), 0);
1229+
}
12301230
// anchor sits in an exon. Check if anchor - binsize is also in same exon.
12311231
if anchor - binsize >= exons[i].0 {
12321232
(Bin::Conbin(anchor - binsize, anchor), anchor - binsize)
@@ -1296,7 +1296,7 @@ fn refpoint_exonwalker(exons: &Vec<(u32, u32)>, anchor: u32, binsize: u32, chrom
12961296
None => {
12971297
// our anchor doesn't sit in exons. We just return the anchor - binsize as Bin
12981298
if anchor < binsize {
1299-
(Bin::Conbin(0, anchor), 0)
1299+
(Bin::Conbin(0, 0), 0)
13001300
} else {
13011301
(Bin::Conbin(anchor - binsize, anchor), anchor - binsize)
13021302
}

0 commit comments

Comments
 (0)