2323#include < TFile.h>
2424#include < TROOT.h>
2525
26- void samtoramntuple (const char *datafile,
27- const char *treefile,
28- bool index, bool split, bool cache,
29- int compression_algorithm,
30- uint32_t quality_policy)
26+ void samtoramntuple (const char *datafile, const char *treefile, bool index, bool split, bool cache,
27+ int compression_algorithm, uint32_t quality_policy)
3128{
3229 (void )split;
3330 (void )cache;
@@ -40,111 +37,111 @@ void samtoramntuple(const char *datafile,
4037 return ;
4138 }
4239
43- RAMNTupleRecord::InitializeRefs ();
44-
45- auto model = RAMNTupleRecord::MakeModel ();
46-
47- ROOT::RNTupleWriteOptions writeOptions;
48- writeOptions.SetCompression (compression_algorithm);
49- writeOptions.SetMaxUnzippedPageSize (64000 );
50-
51- auto writer = ROOT::RNTupleWriter::Append (std::move (model), " RAM" , *rootFile, writeOptions);
52- auto defaultEntry = writer->GetModel ().CreateEntry ();
53- auto recordPtr = defaultEntry->GetPtr <RAMNTupleRecord>(" record" );
54-
55- TList headers;
56- headers.SetName (" headers" );
57-
58- ramcore::SamParser parser;
59-
60- int64_t mapped_count = 0 ;
61- int32_t last_refid = -2 ;
62- int32_t last_indexed_pos = -1000000 ;
63- const int32_t POS_INTERVAL = 10000 ;
64- const int64_t MAPPED_INTERVAL = 100 ;
65-
66- auto header_callback = [&headers](const std::string& tag, const std::string& content) {
67- headers.Add (new TNamed (tag.c_str (), content.c_str ()));
68-
69- if (tag == " @SQ" ) {
70- size_t sn_pos = content.find (" SN:" );
71- if (sn_pos != std::string::npos) {
72- sn_pos += 3 ;
73- size_t tab_pos = content.find (' \t ' , sn_pos);
74- std::string ref_name = content. substr (sn_pos,
75- tab_pos != std::string::npos ? tab_pos - sn_pos : std::string::npos);
76- RAMNTupleRecord::GetRnameRefs ()->GetRefId (ref_name.c_str ());
77- }
78- }
79- };
80-
81- auto record_callback = [&](const ramcore::SamRecord &sam_record, size_t record_num) {
82- recordPtr->SetBit (quality_policy);
83-
84- recordPtr->SetQNAME (sam_record.qname .c_str ());
85- recordPtr->SetFLAG (sam_record.flag );
86- recordPtr->SetREFID (sam_record.rname .c_str ());
87- recordPtr->SetPOS (sam_record.pos );
88- recordPtr->SetMAPQ (sam_record.mapq );
89- recordPtr->SetCIGAR (sam_record.cigar .c_str ());
90- recordPtr->SetREFNEXT (sam_record.rnext .c_str ());
91- recordPtr->SetPNEXT (sam_record.pnext );
92- recordPtr->SetTLEN (sam_record.tlen );
93- recordPtr->SetSEQ (sam_record.seq .c_str ());
94- recordPtr->SetQUAL (sam_record.qual .c_str ());
95-
96- recordPtr->ResetNOPT ();
97- for (const auto &opt : sam_record.optional_fields ) {
98- recordPtr->SetOPT (opt.c_str ());
99- }
100-
101- writer->Fill (*defaultEntry);
102-
103- if (index && !(sam_record.flag & 0x4 ) && recordPtr->GetREFID () >= 0 ) {
104- mapped_count++;
105- bool should_index = false ;
106-
107- int current_refid = recordPtr->GetREFID ();
108- int current_pos = recordPtr->GetPOS () - 1 ;
109-
110- if (current_refid != last_refid) {
111- should_index = true ;
112- last_refid = current_refid;
113- last_indexed_pos = current_pos;
114- }
115-
116- else if (current_pos - last_indexed_pos >= POS_INTERVAL) {
117- should_index = true ;
118- last_indexed_pos = current_pos;
119- }
120-
121- else if (mapped_count % MAPPED_INTERVAL == 0 ) {
122- should_index = true ;
123- }
124-
125- if (should_index) {
126- RAMNTupleRecord::GetIndex ()->AddItem (current_refid, current_pos, record_num);
127- }
128- }
129- };
130-
131- if (!parser.ParseFile (datafile, header_callback, record_callback)) {
132- printf (" Failed to parse SAM file %s\n " , datafile);
133- return ;
134- }
135-
136- writer.reset ();
137-
138- if (index) {
139- RAMNTupleRecord::WriteIndex (*rootFile);
140- }
141- RAMNTupleRecord::WriteAllRefs (*rootFile);
142-
143- headers.Write ();
144- rootFile->Close ();
145-
146- (void )mapped_count;
147- (void )stopwatch;
40+ RAMNTupleRecord::InitializeRefs ();
41+
42+ auto model = RAMNTupleRecord::MakeModel ();
43+
44+ ROOT::RNTupleWriteOptions writeOptions;
45+ writeOptions.SetCompression (compression_algorithm);
46+ writeOptions.SetMaxUnzippedPageSize (64000 );
47+
48+ auto writer = ROOT::RNTupleWriter::Append (std::move (model), " RAM" , *rootFile, writeOptions);
49+ auto defaultEntry = writer->GetModel ().CreateEntry ();
50+ auto recordPtr = defaultEntry->GetPtr <RAMNTupleRecord>(" record" );
51+
52+ TList headers;
53+ headers.SetName (" headers" );
54+
55+ ramcore::SamParser parser;
56+
57+ int64_t mapped_count = 0 ;
58+ int32_t last_refid = -2 ;
59+ int32_t last_indexed_pos = -1000000 ;
60+ const int32_t POS_INTERVAL = 10000 ;
61+ const int64_t MAPPED_INTERVAL = 100 ;
62+
63+ auto header_callback = [&headers](const std::string & tag, const std::string & content) {
64+ headers.Add (new TNamed (tag.c_str (), content.c_str ()));
65+
66+ if (tag == " @SQ" ) {
67+ size_t sn_pos = content.find (" SN:" );
68+ if (sn_pos != std::string::npos) {
69+ sn_pos += 3 ;
70+ size_t tab_pos = content.find (' \t ' , sn_pos);
71+ std::string ref_name =
72+ content. substr (sn_pos, tab_pos != std::string::npos ? tab_pos - sn_pos : std::string::npos);
73+ RAMNTupleRecord::GetRnameRefs ()->GetRefId (ref_name.c_str ());
74+ }
75+ }
76+ };
77+
78+ auto record_callback = [&](const ramcore::SamRecord &sam_record, size_t record_num) {
79+ recordPtr->SetBit (quality_policy);
80+
81+ recordPtr->SetQNAME (sam_record.qname .c_str ());
82+ recordPtr->SetFLAG (sam_record.flag );
83+ recordPtr->SetREFID (sam_record.rname .c_str ());
84+ recordPtr->SetPOS (sam_record.pos );
85+ recordPtr->SetMAPQ (sam_record.mapq );
86+ recordPtr->SetCIGAR (sam_record.cigar .c_str ());
87+ recordPtr->SetREFNEXT (sam_record.rnext .c_str ());
88+ recordPtr->SetPNEXT (sam_record.pnext );
89+ recordPtr->SetTLEN (sam_record.tlen );
90+ recordPtr->SetSEQ (sam_record.seq .c_str ());
91+ recordPtr->SetQUAL (sam_record.qual .c_str ());
92+
93+ recordPtr->ResetNOPT ();
94+ for (const auto &opt : sam_record.optional_fields ) {
95+ recordPtr->SetOPT (opt.c_str ());
96+ }
97+
98+ writer->Fill (*defaultEntry);
99+
100+ if (index && !(sam_record.flag & 0x4 ) && recordPtr->GetREFID () >= 0 ) {
101+ mapped_count++;
102+ bool should_index = false ;
103+
104+ int current_refid = recordPtr->GetREFID ();
105+ int current_pos = recordPtr->GetPOS () - 1 ;
106+
107+ if (current_refid != last_refid) {
108+ should_index = true ;
109+ last_refid = current_refid;
110+ last_indexed_pos = current_pos;
111+ }
112+
113+ else if (current_pos - last_indexed_pos >= POS_INTERVAL) {
114+ should_index = true ;
115+ last_indexed_pos = current_pos;
116+ }
117+
118+ else if (mapped_count % MAPPED_INTERVAL == 0 ) {
119+ should_index = true ;
120+ }
121+
122+ if (should_index) {
123+ RAMNTupleRecord::GetIndex ()->AddItem (current_refid, current_pos, record_num);
124+ }
125+ }
126+ };
127+
128+ if (!parser.ParseFile (datafile, header_callback, record_callback)) {
129+ printf (" Failed to parse SAM file %s\n " , datafile);
130+ return ;
131+ }
132+
133+ writer.reset ();
134+
135+ if (index) {
136+ RAMNTupleRecord::WriteIndex (*rootFile);
137+ }
138+ RAMNTupleRecord::WriteAllRefs (*rootFile);
139+
140+ headers.Write ();
141+ rootFile->Close ();
142+
143+ (void )mapped_count;
144+ (void )stopwatch;
148145}
149146void samtoramntuple_split_by_chromosome (const char *datafile, const char *output_prefix, int compression_algorithm,
150147 uint32_t quality_policy, int num_threads,
@@ -301,4 +298,3 @@ void samtoramntuple_split_by_chromosome(const char *datafile, const char *output
301298 }
302299 }
303300}
304-
0 commit comments