@@ -31,15 +31,14 @@ use std::io::{ErrorKind, Read, Result};
31
31
enum State {
32
32
Top ,
33
33
InString ,
34
- StringEscape ,
35
34
InComment ,
36
35
InBlockComment ,
37
36
MaybeCommentEnd ,
38
37
InLineComment ,
39
38
}
40
39
41
40
use State :: {
42
- InBlockComment , InComment , InLineComment , InString , MaybeCommentEnd , StringEscape , Top ,
41
+ InBlockComment , InComment , InLineComment , InString , MaybeCommentEnd , Top ,
43
42
} ;
44
43
45
44
/// A [`Read`] that transforms another [`Read`] so that it changes all comments to spaces so that a downstream json parser
@@ -159,24 +158,15 @@ pub fn strip(s: &mut str) -> Result<()> {
159
158
/// Fast check if the input contains any comment-starting characters
160
159
#[ inline]
161
160
fn has_potential_comments ( s : & [ u8 ] , settings : CommentSettings ) -> bool {
162
- // Combine all potential comment starting characters into one search
163
- let mut chars_to_find = Vec :: with_capacity ( 2 ) ;
164
-
165
- if settings. block_comments || settings. slash_line_comments {
166
- chars_to_find. push ( b'/' ) ;
167
- }
168
- if settings. hash_line_comments {
169
- chars_to_find. push ( b'#' ) ;
170
- }
171
-
172
- if chars_to_find. is_empty ( ) {
173
- return false ;
174
- }
175
-
176
- if chars_to_find. len ( ) == 1 {
177
- memchr:: memchr ( chars_to_find[ 0 ] , s) . is_some ( )
178
- } else {
179
- memchr:: memchr2 ( chars_to_find[ 0 ] , chars_to_find[ 1 ] , s) . is_some ( )
161
+ // Use the most efficient memchr variant based on enabled comment types
162
+ match (
163
+ settings. block_comments || settings. slash_line_comments ,
164
+ settings. hash_line_comments ,
165
+ ) {
166
+ ( true , true ) => memchr:: memchr2 ( b'/' , b'#' , s) . is_some ( ) ,
167
+ ( true , false ) => memchr:: memchr ( b'/' , s) . is_some ( ) ,
168
+ ( false , true ) => memchr:: memchr ( b'#' , s) . is_some ( ) ,
169
+ ( false , false ) => false ,
180
170
}
181
171
}
182
172
@@ -336,8 +326,7 @@ fn consume_comment_whitespace_until_maybe_bracket(
336
326
}
337
327
return Ok ( * c == b'}' || * c == b']' ) ;
338
328
}
339
- InString => in_string ( * c) ,
340
- StringEscape => InString ,
329
+ InString => skip_string_content ( buf, i) ,
341
330
InComment => in_comment ( c, settings) ?,
342
331
InBlockComment => consume_block_comments ( buf, i) ,
343
332
MaybeCommentEnd => maybe_comment_end ( c) ,
@@ -348,12 +337,13 @@ fn consume_comment_whitespace_until_maybe_bracket(
348
337
Ok ( false )
349
338
}
350
339
340
+
351
341
fn strip_buf ( state : & mut State , buf : & mut [ u8 ] , settings : CommentSettings ) -> Result < ( ) > {
352
342
let mut i = 0 ;
353
343
let len = buf. len ( ) ;
354
344
while i < len {
355
- let c = & mut buf[ i] ;
356
345
if matches ! ( state, Top ) {
346
+ let c = & mut buf[ i] ;
357
347
let cur = i;
358
348
* state = top ( c, settings) ;
359
349
if settings. trailing_commas
@@ -363,10 +353,13 @@ fn strip_buf(state: &mut State, buf: &mut [u8], settings: CommentSettings) -> Re
363
353
buf[ cur] = b' ' ;
364
354
}
365
355
} else {
356
+ let c = & mut buf[ i] ;
366
357
* state = match state {
367
358
Top => unreachable ! ( ) ,
368
- InString => in_string ( * c) ,
369
- StringEscape => InString ,
359
+ InString => {
360
+ // Use optimized string skipping for long strings
361
+ skip_string_content ( buf, & mut i)
362
+ }
370
363
InComment => in_comment ( c, settings) ?,
371
364
InBlockComment => consume_block_comments ( buf, & mut i) ,
372
365
MaybeCommentEnd => maybe_comment_end ( c) ,
@@ -430,12 +423,34 @@ fn top(c: &mut u8, settings: CommentSettings) -> State {
430
423
}
431
424
}
432
425
426
+
427
+ /// Optimized string content skipping using memchr
433
428
#[ inline]
434
- fn in_string ( c : u8 ) -> State {
435
- match c {
436
- b'"' => Top ,
437
- b'\\' => StringEscape ,
438
- _ => InString ,
429
+ fn skip_string_content ( buf : & [ u8 ] , i : & mut usize ) -> State {
430
+ // Find the next quote or backslash
431
+ match memchr:: memchr2 ( b'"' , b'\\' , & buf[ * i..] ) {
432
+ Some ( offset) => {
433
+ * i += offset;
434
+ match buf[ * i] {
435
+ b'"' => {
436
+ // Found closing quote
437
+ Top
438
+ }
439
+ b'\\' => {
440
+ // Found escape, skip next character if it exists
441
+ if * i + 1 < buf. len ( ) {
442
+ * i += 1 ;
443
+ }
444
+ InString
445
+ }
446
+ _ => unreachable ! ( ) ,
447
+ }
448
+ }
449
+ None => {
450
+ // No more quotes or escapes in this buffer
451
+ * i = buf. len ( ) - 1 ;
452
+ InString
453
+ }
439
454
}
440
455
}
441
456
0 commit comments