@@ -24,8 +24,9 @@ use crate::tokens::{Token, TokenStream};
24
24
/// ```
25
25
///
26
26
/// This processor must be used with a token stream that still retains the
27
- /// original spacing of the source code, because it needs the spacing for
28
- /// determining the original indentation of the comment. For example:
27
+ /// original spacing of the source code (but with tabs replaced by spaces),
28
+ /// because it needs the spacing for determining the original indentation
29
+ /// of the comment. For example:
29
30
///
30
31
/// ```text
31
32
/// rule test {
73
74
start_of_input : bool ,
74
75
end_of_input : bool ,
75
76
indentation : usize ,
77
+ tab_size : usize ,
76
78
}
77
79
78
80
/// States used in [`CommentProcessor::process_input_buffer`]
@@ -103,9 +105,18 @@ where
103
105
start_of_input : true ,
104
106
end_of_input : false ,
105
107
indentation : 0 ,
108
+ tab_size : 4 ,
106
109
}
107
110
}
108
111
112
+ /// Number of spaces in a tab.
113
+ ///
114
+ /// The default is `4`.
115
+ pub fn tab_size ( mut self , n : usize ) -> Self {
116
+ self . tab_size = n;
117
+ self
118
+ }
119
+
109
120
fn push_comment (
110
121
& mut self ,
111
122
comment_lines : Vec < Vec < u8 > > ,
@@ -154,7 +165,11 @@ where
154
165
State :: PreComment { leading_newline } => {
155
166
match self . input_buffer . pop_front ( ) {
156
167
Some ( token @ Token :: Whitespace ) => {
157
- self . indentation += token. len ( ) ;
168
+ self . indentation += 1 ;
169
+ self . output_buffer . push_back ( token) ;
170
+ }
171
+ Some ( token @ Token :: Tab ) => {
172
+ self . indentation += self . tab_size ;
158
173
self . output_buffer . push_back ( token) ;
159
174
}
160
175
// A newline has been found while in PreComment state,
@@ -174,6 +189,7 @@ where
174
189
lines : split_comment_lines (
175
190
comment,
176
191
self . indentation ,
192
+ self . tab_size ,
177
193
) ,
178
194
} ;
179
195
self . indentation += token. len ( ) ;
@@ -189,8 +205,11 @@ where
189
205
leading_newline,
190
206
indentation,
191
207
} => match self . input_buffer . pop_front ( ) {
192
- Some ( token @ Token :: Whitespace ) => {
193
- self . indentation += token. len ( ) ;
208
+ Some ( Token :: Whitespace ) => {
209
+ self . indentation += 1 ;
210
+ }
211
+ Some ( Token :: Tab ) => {
212
+ self . indentation += self . tab_size ;
194
213
}
195
214
// Newline found while in the Comment state. If this is the
196
215
// first newline after the comment, the trailing_newline
@@ -241,8 +260,12 @@ where
241
260
Some ( Token :: Comment ( comment) ) => {
242
261
if * indentation == self . indentation {
243
262
lines. append (
244
- split_comment_lines ( comment, * indentation)
245
- . as_mut ( ) ,
263
+ split_comment_lines (
264
+ comment,
265
+ * indentation,
266
+ self . tab_size ,
267
+ )
268
+ . as_mut ( ) ,
246
269
) ;
247
270
* trailing_newline = false ;
248
271
} else {
@@ -258,6 +281,7 @@ where
258
281
lines : split_comment_lines (
259
282
comment,
260
283
self . indentation ,
284
+ self . tab_size ,
261
285
) ,
262
286
} ;
263
287
}
@@ -331,7 +355,7 @@ where
331
355
/// Splits a multi-line comment into lines.
332
356
///
333
357
/// Also removes the specified number of whitespaces from the beginning of
334
- /// each line, except the first one .
358
+ /// each line.
335
359
///
336
360
/// This is necessary because when a multi-line comment that uses the
337
361
/// `/* comment */` syntax is indented, the comment itself contains some spaces
@@ -346,16 +370,31 @@ where
346
370
/// Notice how the comment contains some spaces (here represented by
347
371
/// `<-- indentation -->`) that should be removed/adjusted when the comment
348
372
/// is re-indented.
349
- fn split_comment_lines ( comment : & [ u8 ] , indentation : usize ) -> Vec < Vec < u8 > > {
373
+ fn split_comment_lines (
374
+ comment : & [ u8 ] ,
375
+ indentation : usize ,
376
+ tab_size : usize ,
377
+ ) -> Vec < Vec < u8 > > {
350
378
let comment = BStr :: new ( comment) ;
351
- let indent = b" " . repeat ( indentation) ;
352
379
let mut result = Vec :: new ( ) ;
353
380
for line in comment. lines ( ) {
354
- if let Some ( line_no_indent) = line. strip_prefix ( indent. as_slice ( ) ) {
355
- result. push ( line_no_indent. to_vec ( ) )
356
- } else {
357
- result. push ( line. to_owned ( ) )
381
+ let mut i = 0 ;
382
+ let mut comment_start = 0 ;
383
+ for ( start, _, ch) in line. char_indices ( ) {
384
+ if i >= indentation {
385
+ comment_start = start;
386
+ break ;
387
+ }
388
+ match ch {
389
+ ' ' => i += 1 ,
390
+ '\t' => i += tab_size,
391
+ _ => {
392
+ comment_start = start;
393
+ break ;
394
+ }
395
+ }
358
396
}
397
+ result. push ( line. get ( comment_start..) . unwrap_or_default ( ) . to_vec ( ) ) ;
359
398
}
360
399
result
361
400
}
0 commit comments