Skip to content

Commit 59e7963

Browse files
CopilotBoshen
andcommitted
Implement advanced performance optimizations with significant speedups
Co-authored-by: Boshen <[email protected]>
1 parent d8d5d18 commit 59e7963

File tree

1 file changed

+45
-30
lines changed

1 file changed

+45
-30
lines changed

src/lib.rs

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,14 @@ use std::io::{ErrorKind, Read, Result};
3131
enum State {
3232
Top,
3333
InString,
34-
StringEscape,
3534
InComment,
3635
InBlockComment,
3736
MaybeCommentEnd,
3837
InLineComment,
3938
}
4039

4140
use State::{
42-
InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top,
41+
InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, Top,
4342
};
4443

4544
/// A [`Read`] that transforms another [`Read`] so that it changes all comments to spaces so that a downstream json parser
@@ -159,24 +158,15 @@ pub fn strip(s: &mut str) -> Result<()> {
159158
/// Fast check if the input contains any comment-starting characters
160159
#[inline]
161160
fn has_potential_comments(s: &[u8], settings: CommentSettings) -> bool {
162-
// Combine all potential comment starting characters into one search
163-
let mut chars_to_find = Vec::with_capacity(2);
164-
165-
if settings.block_comments || settings.slash_line_comments {
166-
chars_to_find.push(b'/');
167-
}
168-
if settings.hash_line_comments {
169-
chars_to_find.push(b'#');
170-
}
171-
172-
if chars_to_find.is_empty() {
173-
return false;
174-
}
175-
176-
if chars_to_find.len() == 1 {
177-
memchr::memchr(chars_to_find[0], s).is_some()
178-
} else {
179-
memchr::memchr2(chars_to_find[0], chars_to_find[1], s).is_some()
161+
// Use the most efficient memchr variant based on enabled comment types
162+
match (
163+
settings.block_comments || settings.slash_line_comments,
164+
settings.hash_line_comments,
165+
) {
166+
(true, true) => memchr::memchr2(b'/', b'#', s).is_some(),
167+
(true, false) => memchr::memchr(b'/', s).is_some(),
168+
(false, true) => memchr::memchr(b'#', s).is_some(),
169+
(false, false) => false,
180170
}
181171
}
182172

@@ -336,8 +326,7 @@ fn consume_comment_whitespace_until_maybe_bracket(
336326
}
337327
return Ok(*c == b'}' || *c == b']');
338328
}
339-
InString => in_string(*c),
340-
StringEscape => InString,
329+
InString => skip_string_content(buf, i),
341330
InComment => in_comment(c, settings)?,
342331
InBlockComment => consume_block_comments(buf, i),
343332
MaybeCommentEnd => maybe_comment_end(c),
@@ -348,12 +337,13 @@ fn consume_comment_whitespace_until_maybe_bracket(
348337
Ok(false)
349338
}
350339

340+
351341
fn strip_buf(state: &mut State, buf: &mut [u8], settings: CommentSettings) -> Result<()> {
352342
let mut i = 0;
353343
let len = buf.len();
354344
while i < len {
355-
let c = &mut buf[i];
356345
if matches!(state, Top) {
346+
let c = &mut buf[i];
357347
let cur = i;
358348
*state = top(c, settings);
359349
if settings.trailing_commas
@@ -363,10 +353,13 @@ fn strip_buf(state: &mut State, buf: &mut [u8], settings: CommentSettings) -> Re
363353
buf[cur] = b' ';
364354
}
365355
} else {
356+
let c = &mut buf[i];
366357
*state = match state {
367358
Top => unreachable!(),
368-
InString => in_string(*c),
369-
StringEscape => InString,
359+
InString => {
360+
// Use optimized string skipping for long strings
361+
skip_string_content(buf, &mut i)
362+
}
370363
InComment => in_comment(c, settings)?,
371364
InBlockComment => consume_block_comments(buf, &mut i),
372365
MaybeCommentEnd => maybe_comment_end(c),
@@ -430,12 +423,34 @@ fn top(c: &mut u8, settings: CommentSettings) -> State {
430423
}
431424
}
432425

426+
427+
/// Optimized string content skipping using memchr
433428
#[inline]
434-
fn in_string(c: u8) -> State {
435-
match c {
436-
b'"' => Top,
437-
b'\\' => StringEscape,
438-
_ => InString,
429+
fn skip_string_content(buf: &[u8], i: &mut usize) -> State {
430+
// Find the next quote or backslash
431+
match memchr::memchr2(b'"', b'\\', &buf[*i..]) {
432+
Some(offset) => {
433+
*i += offset;
434+
match buf[*i] {
435+
b'"' => {
436+
// Found closing quote
437+
Top
438+
}
439+
b'\\' => {
440+
// Found escape, skip next character if it exists
441+
if *i + 1 < buf.len() {
442+
*i += 1;
443+
}
444+
InString
445+
}
446+
_ => unreachable!(),
447+
}
448+
}
449+
None => {
450+
// No more quotes or escapes in this buffer
451+
*i = buf.len() - 1;
452+
InString
453+
}
439454
}
440455
}
441456

0 commit comments

Comments
 (0)