Skip to content

Commit afd5cdd

Browse files
authored
Merge pull request #85 from dieterich-lab/devel
Go for it
2 parents 6e4fe3c + 07192e7 commit afd5cdd

File tree

12 files changed

+133
-108
lines changed

12 files changed

+133
-108
lines changed

DCC/Circ_nonCirc_Exon_Match.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import HTSeq
77

8-
from IntervalTree import IntervalTree
8+
from .IntervalTree import IntervalTree
99

1010

1111
class CircNonCircExon(object):
@@ -194,7 +194,7 @@ def printuniq(self, Infile):
194194
for lin in f:
195195
lin_split = lin.split('\t')
196196
if keys.count(lin_split[0] + '\t' + lin_split[1] + '\t' + lin_split[2]) == 1:
197-
print lin.strip('\n')
197+
print(lin.strip('\n'))
198198

199199
def readgtf(self, gtf_file):
200200
# store nonCircExons based on transcript_id and exon_number with all its annotations from different transcripts
@@ -275,7 +275,7 @@ def readHTSeqCount(self, HTSeqCount, exon_id2custom_exon_id):
275275
def findcircAdjacent(self, circExons, Custom_exon_id2Iv, Iv2Custom_exon_id, start=True):
276276
circAdjacentExons = {}
277277
circAdjacentExonsIv = {}
278-
for key in circExons.keys():
278+
for key in list(circExons.keys()):
279279
for ids in circExons[key]:
280280
try:
281281
interval = Custom_exon_id2Iv[self.getAdjacent(ids, start=start)]
@@ -292,7 +292,7 @@ def printCounts(self, Exons, Count_custom_exon_id, Custom_exon_id2Length):
292292
# Print the counts of circexons and adjacentexons
293293
# Exons: dictionaries with intervals as key, custom_exon_id as values
294294
ExonCounts = {}
295-
for key in Exons.keys():
295+
for key in list(Exons.keys()):
296296
counts = []
297297
for ids in Exons[key]: # If for circAdjacentExons, ids here is a list
298298
try:
@@ -397,7 +397,7 @@ def readSJ_out_tab(self, SJ_out_tab):
397397
strand] = lin_split[6]
398398
sj.close()
399399
except IOError:
400-
print 'Do you have SJ.out.tab files in your sample folder? DCC cannot find it.'
400+
print('Do you have SJ.out.tab files in your sample folder? DCC cannot find it.')
401401
return junctionReadCount
402402

403403
def getskipjunctionCount(self, exonskipjunctions, junctionReadCount):

DCC/CombineCounts.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ def comb_coor(self, circfiles, strand=True):
3535
onefile.close()
3636

3737
if strand:
38-
coors = ['\t'.join(key.split('\t')[:-1]) + value for key, value in coorsDict.iteritems()]
38+
coors = ['\t'.join(key.split('\t')[:-1]) + value for key, value in coorsDict.items()]
3939
else:
40-
coors = ['{}{}'.format(key, value) for key, value in coorsDict.iteritems()]
40+
coors = ['{}{}'.format(key, value) for key, value in coorsDict.items()]
4141

4242
coorsSorted = self.sortBed(coors, retList=True)
4343
for itm in coorsSorted:

DCC/IntervalTree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def intersect(self, interval, report_func):
3636
# use the intersect method of IntervalNode class, need make this function aware of strand
3737

3838
def traverse(self, func):
39-
for item in self.chroms.itervalues():
39+
for item in self.chroms.values():
4040
item.traverse(func)
4141

4242

DCC/__init__.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Import modules
2-
from findcircRNA import Findcirc
3-
from circFilter import Circfilter
4-
from circAnnotate import CircAnnotate
5-
from genecount import Genecount
6-
from CombineCounts import Combine
7-
from Circ_nonCirc_Exon_Match import CircNonCircExon
8-
from IntervalTree import IntervalTree
9-
from main import main
2+
from .findcircRNA import Findcirc
3+
from .circFilter import Circfilter
4+
from .circAnnotate import CircAnnotate
5+
from .genecount import Genecount
6+
from .CombineCounts import Combine
7+
from .Circ_nonCirc_Exon_Match import CircNonCircExon
8+
from .IntervalTree import IntervalTree
9+
from .main import main

DCC/circAnnotate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import HTSeq
1111

12-
from IntervalTree import IntervalTree
12+
from .IntervalTree import IntervalTree
1313

1414

1515
class CircAnnotate(object):

DCC/circFilter.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import HTSeq
66

7-
from IntervalTree import IntervalTree
7+
from .IntervalTree import IntervalTree
88

99

1010
##########################
@@ -63,7 +63,7 @@ def readcirc(self, countfile, coordinates):
6363

6464
# Do filtering
6565
def filtercount(self, count, indx):
66-
print 'Filtering by read counts'
66+
print('Filtering by read counts')
6767
sel = [] # store the passed filtering rows
6868
for itm in range(len(count)):
6969
if indx[itm][4] == '0':
@@ -117,7 +117,7 @@ def dummy_filter(self, indx0, count0):
117117
np.savetxt(self.tmp_dir + 'tmp_unsortedWithChrM', nonrep, delimiter='\t', newline='\n', fmt='%s')
118118

119119
def removeChrM(self, withChrM):
120-
print 'Remove ChrM'
120+
print('Remove ChrM')
121121
unremoved = open(withChrM, 'r').readlines()
122122
removed = []
123123
for lines in unremoved:

DCC/findcircRNA.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def sepDuplicates(self, Chim_junc, duplicates, nonduplicates):
7878
if reads.count(read) == 2:
7979
dup.write(lines[indx])
8080
elif reads.count(read) > 2:
81-
print 'Read %s has more than 2 count.' % read
81+
print('Read %s has more than 2 count.' % read)
8282
try:
8383
logging.warning('Read %s has more than 2 count.' % read)
8484
except NameError:
@@ -159,8 +159,8 @@ def findcirc(self, Chim_junc, output, strand=True):
159159
linecnt = linecnt + 1
160160

161161
if len(L) < 14:
162-
print ("WARNING: File " + str(Chim_junc) + ", line " + str(linecnt) + " does not contain all features.")
163-
print ("WARNING: " + str(Chim_junc) + " is probably corrupt.")
162+
print(("WARNING: File " + str(Chim_junc) + ", line " + str(linecnt) + " does not contain all features."))
163+
print(("WARNING: " + str(Chim_junc) + " is probably corrupt."))
164164
if L[0] == "chr_donorA":
165165
continue
166166
if int(L[6]) >= 0 and L[0] == L[3] and L[2] == L[5] and (
@@ -217,7 +217,7 @@ def count(self, sortedlist, strand=True):
217217
elif not strand:
218218
circs = (itm[0], itm[1], itm[2])
219219
else:
220-
print "Please specify correct strand information."
220+
print("Please specify correct strand information.")
221221
cnt[circs] += 1
222222
itm.append(str(cnt[circs]))
223223
# tmp_count.append( [itm[0],itm[1],itm[2],itm[3],itm[7],itm[4],itm[5],itm[6]] )

DCC/fix2chimera.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ def modify_junctiontype(junctiontype):
5555
continue
5656
# check if the row has all fields
5757
if len(line_split) < 14:
58-
print ("WARNING: File " + str(chimeric_junction_mate2) + ", line " + str(linecnt)
59-
+ " does not contain all features.")
60-
print ("WARNING: " + str(chimeric_junction_mate2) + " is probably corrupt.")
61-
print ("WARNING: Offending line: " + str(line))
58+
print(("WARNING: File " + str(chimeric_junction_mate2) + ", line " + str(linecnt)
59+
+ " does not contain all features."))
60+
print(("WARNING: " + str(chimeric_junction_mate2) + " is probably corrupt."))
61+
print(("WARNING: Offending line: " + str(line)))
6262

6363
linecnt += 1
6464

@@ -110,7 +110,7 @@ def printduplicates(self, merged, duplicates, field=10):
110110
if not os.path.isfile(merged):
111111
sys.exit("ERROR: File " + str(merged) + " is missing!")
112112
elif os.stat(merged).st_size == 0:
113-
print ("WARNING: File " + str(merged) + " is empty!")
113+
print(("WARNING: File " + str(merged) + " is empty!"))
114114
else:
115115
try:
116116
inputfile = open(merged, 'r')

DCC/genecount.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -99,33 +99,33 @@ def genecount(self, circ_coordinates, bamfile, ref, tid):
9999
start_coordinates.close()
100100
end_coordinates.close()
101101

102-
print ('Started linear gene expression counting for %s' % bamfile)
102+
print(('Started linear gene expression counting for %s' % bamfile))
103103

104104
start = time.time()
105105
# mpileup get the read counts of the start and end positions
106-
print ("\t=> running mpileup for start positions [%s]" % bamfile)
106+
print(("\t=> running mpileup for start positions [%s]" % bamfile))
107107
mpileup_start = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coordinates_' + tid)
108108
end = time.time() - start
109-
print ("\t=> mpileup for start positions for %s took %d seconds" % (bamfile, end))
109+
print(("\t=> mpileup for start positions for %s took %d seconds" % (bamfile, end)))
110110

111111
start = time.time()
112112
# mpileup get the read counts of the start and end positions
113-
print ("\t=> running mpileup for end positions [%s]" % bamfile)
113+
print(("\t=> running mpileup for end positions [%s]" % bamfile))
114114
mpileup_end = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coordinates_' + tid)
115115
end = time.time() - start
116-
print ("\t=> mpileup for end positions for %s took %d seconds" % (bamfile, end))
116+
print(("\t=> mpileup for end positions for %s took %d seconds" % (bamfile, end)))
117117

118-
print "\t=> gathering read counts for start positions [%s]" % bamfile
118+
print("\t=> gathering read counts for start positions [%s]" % bamfile)
119119
startcount = self.getreadscount(mpileup_start, countmapped=True)
120120

121-
print "\t=> gathering read counts for end positions [%s]" % bamfile
121+
print("\t=> gathering read counts for end positions [%s]" % bamfile)
122122
endcount = self.getreadscount(mpileup_end, countmapped=True)
123123

124124
# remove tmp files
125125
# os.remove(self.tmp_dir + 'tmp_start_coordinates_' + tid)
126126
# os.remove(self.tmp_dir + 'tmp_end_coordinates_' + tid)
127127

128-
print 'Finished linear gene expression counting for %s' % bamfile
128+
print('Finished linear gene expression counting for %s' % bamfile)
129129

130130
return startcount, endcount
131131

@@ -194,29 +194,29 @@ def linearsplicedreadscount(self, circ_coor, bamfile, ref, header=True):
194194
start_coor_1.close()
195195
end_coor.close()
196196
end_coor_1.close()
197-
print ('Started linear spliced read counting for %s' % bamfile)
197+
print(('Started linear spliced read counting for %s' % bamfile))
198198

199199
# mpileup get the number of spliced reads at circle start position and (start-1) position.
200200

201-
print ("\t=> running mpileup 1 for start positions [%s]" % bamfile)
201+
print(("\t=> running mpileup 1 for start positions [%s]" % bamfile))
202202
mpileup_start = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coor_1')
203203

204-
print ("\t=> running mpileup 2 for start positions [%s]" % bamfile)
204+
print(("\t=> running mpileup 2 for start positions [%s]" % bamfile))
205205
mpileup_start_1 = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coor_2')
206206

207207
# mpileup get the number of spliced reads at circle end position and (end+1) position.
208-
print ("\t=> running mpileup 1 for end positions [%s]" % bamfile)
208+
print(("\t=> running mpileup 1 for end positions [%s]" % bamfile))
209209
mpileup_end = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coor_1')
210210

211-
print ("\t=> running mpileup 2 for end positions [%s]" % bamfile)
211+
print(("\t=> running mpileup 2 for end positions [%s]" % bamfile))
212212
mpileup_end_1 = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coor_2')
213213

214214
# get count
215215

216-
print "\t=> gathering read counts for start positions [%s]" % bamfile
216+
print("\t=> gathering read counts for start positions [%s]" % bamfile)
217217
startcount = self.submpileup(self.getreadscount(mpileup_start_1), self.getreadscount(mpileup_start))
218218

219-
print "\t=> gathering read counts for end positions [%s]" % bamfile
219+
print("\t=> gathering read counts for end positions [%s]" % bamfile)
220220
endcount = self.submpileup(self.getreadscount(mpileup_end), self.getreadscount(mpileup_end_1), left=False)
221221

222222
# remove tmp files
@@ -225,7 +225,7 @@ def linearsplicedreadscount(self, circ_coor, bamfile, ref, header=True):
225225
# os.remove(self.tmp_dir + 'tmp_end_coor')
226226
# os.remove(self.tmp_dir + 'tmp_end_coor_1')
227227

228-
print 'Finished linear spliced read counting for %s' % bamfile
228+
print('Finished linear spliced read counting for %s' % bamfile)
229229

230230
return startcount, endcount
231231

@@ -266,7 +266,7 @@ def comb_gen_count(self, circ_coor, bamfile, ref, output, countlinearsplicedread
266266
# call genecount to get the start and end positon read counts
267267
tmp_start, tmp_end = self.genecount(circ_coor, bamfile, ref, tid)
268268

269-
print 'Ended linear gene expression counting %s' % bamfile
269+
print('Ended linear gene expression counting %s' % bamfile)
270270
logging.info('Ended linear gene expression counting %s' % bamfile)
271271

272272
for line in tmp_start:
@@ -314,6 +314,6 @@ def comb_gen_count(self, circ_coor, bamfile, ref, output, countlinearsplicedread
314314
# tmp_end.close()
315315
count_table.close()
316316

317-
print 'Ended post processing %s' % bamfile
317+
print('Ended post processing %s' % bamfile)
318318
logging.info('Ended post processing %s' % bamfile)
319319
return tid

0 commit comments

Comments
 (0)